# Setup

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
os.chdir('/content/drive/MyDrive/w266 project/dontpatronizeme/semeval-2022')
os.getcwd()

'/content/drive/MyDrive/w266 project/dontpatronizeme/semeval-2022'

In [3]:
#!pip install alibi
!pip install transformers

Collecting transformers
  Downloading transformers-4.18.0-py3-none-any.whl (4.0 MB)
[K     |████████████████████████████████| 4.0 MB 5.1 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.5.1-py3-none-any.whl (77 kB)
[K     |████████████████████████████████| 77 kB 6.7 MB/s 
Collecting tokenizers!=0.11.3,<0.13,>=0.11.1
  Downloading tokenizers-0.11.6-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.5 MB)
[K     |████████████████████████████████| 6.5 MB 50.3 MB/s 
[?25hCollecting sacremoses
  Downloading sacremoses-0.0.49-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 57.6 MB/s 
[?25hCollecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 51.9 MB/s 
Installing collected packages: pyyaml, tokenizers, sacremoses, huggingface-hub, transformers
  Attempting uninstall: pyyaml


In [4]:
import pandas as pd
import numpy as np
import ast
import matplotlib.pyplot as plt
import random

from sklearn.metrics import f1_score
import tensorflow as tf
import transformers
from transformers import BertTokenizer, TFBertModel, DistilBertTokenizer, TFDistilBertModel

#import alibi
#from alibi.explainers import IntegratedGradients

import logging
tf.get_logger().setLevel(logging.ERROR)
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [5]:
# helper function to save predictions to an output file
def labels2file(p, outf_path):
	with open(outf_path,'w') as outf:
		for pi in p:
			outf.write(','.join([str(k) for k in pi])+'\n')

# Data

In [6]:
#!wget http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz
#!tar -xf aclImdb_v1.tar.gz

In [7]:
from pathlib import Path

def read_imdb_split(split_dir):
    split_dir = Path(split_dir)
    texts = []
    labels = []
    for label_dir in ["pos", "neg"]:
        for text_file in (split_dir/label_dir).iterdir():
            texts.append(text_file.read_text())
            labels.append(0 if label_dir is "neg" else 1)

    return texts, labels

train_texts, train_labels = read_imdb_split('aclImdb/train')
#test_texts, test_labels = read_imdb_split('aclImdb/test')

In [8]:
import random
# Shuffle two lists with same order
# Using zip() + * operator + shuffle()
temp = list(zip(train_texts, train_labels))
random.shuffle(temp)
res1, res2 = zip(*temp)
# res1 and res2 come out as tuples, and so must be converted to lists.
train_texts, train_labels = list(res1), list(res2)

# Model

In [9]:
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')

max_length = 128 

def tokenize_sentences(sentences):

  return tokenizer(sentences, 
                   max_length=max_length,
                   truncation=True,
                   padding='max_length', 
                   return_tensors='tf')

x_train = tokenize_sentences([str(x) for x in train_texts[:13000]])
x_test = tokenize_sentences([str(x) for x in train_texts[13000:14000]])

y_train = np.array(train_labels[:13000])
y_test = np.array(train_labels[13000:14000])

Downloading:   0%|          | 0.00/208k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [10]:
try:
    del classification_model
except:
    pass

try:
    del bert_model
except:
    pass

tf.keras.backend.clear_session()

# parameter
hidden_size = 256
train_layers = -1
optimizer = tf.keras.optimizers.Adam(5e-5)
#optimizer = tf.keras.optimizers.Adam(lr=5e-5, beta_1=0.91, beta_2=0.999, epsilon=None, decay=0.1, amsgrad=False)


# model & layers
bert_model = TFBertModel.from_pretrained('bert-base-cased')
dense_layer = tf.keras.layers.Dense(hidden_size, name='hidden_layer')
cls_layer = tf.keras.layers.Dense(1, activation='sigmoid', name='classification_layer')

Downloading:   0%|          | 0.00/502M [00:00<?, ?B/s]

Some layers from the model checkpoint at bert-base-cased were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-cased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [11]:
input_ids = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int32, name='input_ids_layer')
token_type_ids = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int32, name='token_type_ids_layer')
attention_mask = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int32, name='attention_mask_layer')

bert_inputs = {'input_ids': input_ids,
              'token_type_ids': token_type_ids,
              'attention_mask': attention_mask}

#restrict training to the train_layers outer transformer layers
if not train_layers == -1:

        retrain_layers = []

        for retrain_layer_number in range(train_layers):

            layer_code = '_' + str(11 - retrain_layer_number)
            retrain_layers.append(layer_code)

        for w in bert_model.weights:
            if not any([x in w.name for x in retrain_layers]):
                w._trainable = False


bert_out = bert_model(bert_inputs) #, output_hidden_states=True

classification_token = tf.keras.layers.Lambda(lambda x: x[:,0,:], name='get_first_vector')(bert_out[0])
dense = dense_layer(classification_token)
dense = tf.keras.layers.Dropout(rate=0.1)(dense)
classification = cls_layer(dense)

classification_model = tf.keras.Model(inputs=[input_ids, token_type_ids, attention_mask], 
                                      outputs=[classification])

classification_model.compile(optimizer=optimizer,
                        loss=tf.keras.losses.BinaryCrossentropy(from_logits=False),
                        metrics='accuracy')

In [12]:
# train
for epoch in range(3):
  classification_model.fit([x_train.input_ids, x_train.token_type_ids, x_train.attention_mask], y_train,
                          validation_data=([x_test.input_ids, x_test.token_type_ids, x_test.attention_mask], y_test),
                          epochs=1, batch_size=32)
  
  y_predict_values = classification_model.predict([x_test.input_ids, x_test.token_type_ids, x_test.attention_mask], 
                                          batch_size=32, verbose=1)  
  
  y_predict = [1 if i[0]>0.5 else 0 for i in y_predict_values]
  print('Epoch:', epoch+1, 'F1:', f1_score(y_test, y_predict))

Epoch: 1 F1: 0.8303571428571428
Epoch: 2 F1: 0.8773388773388774
Epoch: 3 F1: 0.8711288711288712


# IG - By Hand

In [22]:
# example
i = 13
sentence = train_texts[i]
sentence = "This movie really sucks. Worst movie."
words = sentence.split()
text_ig = tokenize_sentences(sentence)
tokenized_words = tokenizer.convert_ids_to_tokens(text_ig.input_ids[0]) # for output
embedding = bert_model(text_ig, output_hidden_states=True)[2][0] # hidden_states / 1st layer # checked that it's correct.
sentence

'This movie really sucks. Worst movie.'

In [23]:
# baseline
mask = (text_ig.input_ids<900)
mask_embedding = tf.reshape(tf.repeat(tf.cast(mask, embedding.dtype), 768),[1, max_length, 768])
baseline = tf.multiply(embedding, mask_embedding)

In [None]:
# create model from classification model that takes embeddings
input_embeds = tf.keras.layers.Input(shape=(max_length, 768), dtype=tf.float32, name='input_embeds_layer')
token_type_ids_ig = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int32, name='token_type_ids_layer')
attention_mask_ig = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int32, name='attention_mask_layer')

bert_inputs_ig = {'inputs_embeds': input_embeds,
                  'token_type_ids': token_type_ids_ig,
                  'attention_mask': attention_mask_ig}

bert_out_ig = bert_model(bert_inputs_ig) #output_hidden_states=True

classification_token_ig = tf.keras.layers.Lambda(lambda x: x[:,0,:], name='get_first_vector')(bert_out_ig[0])
dense_ig = dense_layer(classification_token_ig)
dense_ig = tf.keras.layers.Dropout(rate=0.1)(dense_ig)
classification_ig = cls_layer(dense_ig)

classification_model_ig = tf.keras.Model(inputs=[input_embeds, token_type_ids_ig, attention_mask_ig], 
                                         outputs=[classification_ig])

classification_model_ig.predict([embedding, text_ig.token_type_ids, text_ig.attention_mask])

In [None]:
m_steps = 40
alphas = tf.linspace(start=0.0, stop=1, num=m_steps+1) # Generate m_steps intervals for integral_approximation() below.
#alphas = alphas[1:] # exclude the one with all 0's. those gradients are very big.

def interpolate_embeddings(baseline, embedding, alphas):
  alphas_x = alphas[:, tf.newaxis, tf.newaxis, tf.newaxis]
  baseline_x = tf.expand_dims(baseline, axis=0)
  input_x = tf.expand_dims(embedding, axis=0)
  delta = input_x - baseline_x
  embeddings = baseline_x +  alphas_x * delta
  return embeddings

interpolated_embeddings = interpolate_embeddings(baseline, embedding, alphas)
interpolated_embeddings.shape

In [None]:
step = tf.subtract(interpolated_embeddings[1], interpolated_embeddings[0])

In [None]:
def compute_gradients(embeddings, text_ig):
  with tf.GradientTape() as tape:
    tape.watch(embeddings)
    probs = classification_model_ig([tf.squeeze(embeddings, axis=1),
                                     tf.repeat(text_ig.token_type_ids, embeddings.shape[0], axis=0),
                                     tf.repeat(text_ig.attention_mask, embeddings.shape[0], axis=0)])
  return tape.gradient(probs, embeddings)

path_gradients = compute_gradients(interpolated_embeddings, text_ig)
path_gradients.shape

In [None]:
def integral_approximation(gradients):
  integrated_gradients = tf.math.reduce_mean(gradients * step, axis=[0,3]) 
  return integrated_gradients

attrs = integral_approximation(path_gradients)
attrs.shape

In [None]:
from IPython.display import HTML
import matplotlib as mpl

# Return HTML markup which highlights the text with a desired color.
def  hlstr(string, color='white'):
    return f"<mark style=background-color:{color}>{string} </mark>"

# Calculates color based on attribution values
def colorize(attrs, cmap='PiYG'):
    cmap_bound = np.abs(attrs).max()
    norm = mpl.colors.Normalize(vmin=-cmap_bound, vmax=cmap_bound)
    cmap = mpl.cm.get_cmap(cmap)

    colors = list(map(lambda x: mpl.colors.rgb2hex(cmap(norm(x))), attrs))
    return colors

colors = colorize(attrs[0])
HTML("".join(list(map(hlstr, tokenized_words, colors))))

In [21]:
attrs

<tf.Tensor: shape=(1, 128), dtype=float32, numpy=
array([[ 0.0000000e+00,  3.9660404e-06, -2.1904470e-05, -2.4042600e-05,
        -3.9675182e-05,  0.0000000e+00, -3.3141616e-05, -1.5058831e-05,
        -8.7642047e-06,  0.0000000e+00,  0.0000000e+00,  0.0000000e+00,
         0.0000000e+00,  0.0000000e+00,  0.0000000e+00,  0.0000000e+00,
         0.0000000e+00,  0.0000000e+00,  0.0000000e+00,  0.0000000e+00,
         0.0000000e+00,  0.0000000e+00,  0.0000000e+00,  0.0000000e+00,
         0.0000000e+00,  0.0000000e+00,  0.0000000e+00,  0.0000000e+00,
         0.0000000e+00,  0.0000000e+00,  0.0000000e+00,  0.0000000e+00,
         0.0000000e+00,  0.0000000e+00,  0.0000000e+00,  0.0000000e+00,
         0.0000000e+00,  0.0000000e+00,  0.0000000e+00,  0.0000000e+00,
         0.0000000e+00,  0.0000000e+00,  0.0000000e+00,  0.0000000e+00,
         0.0000000e+00,  0.0000000e+00,  0.0000000e+00,  0.0000000e+00,
         0.0000000e+00,  0.0000000e+00,  0.0000000e+00,  0.0000000e+00,
         0.000