<a href="https://colab.research.google.com/github/RyanChen12035/w266-NLP/blob/main/w266_final_project_mode1_pruning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install pydot --quiet
!pip install tensorflow-datasets --quiet
!pip install transformers --quiet

In [2]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  1


In [3]:
!nvidia-smi -L

GPU 0: Tesla T4 (UUID: GPU-f5a96a17-f8fb-9640-302b-e8fb1792910a)


In [4]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Embedding, Input, Dense, Lambda, Dropout, Conv1D, GlobalMaxPooling1D, Concatenate, Activation
from tensorflow.keras.models import Model
import tensorflow.keras.backend as K
import tensorflow_datasets as tfds
from transformers import BertTokenizer, TFBertModel
from transformers import logging
logging.set_verbosity_error()
import sklearn as sk
import os
from nltk.data import find
import matplotlib.pyplot as plt
import re
from tensorflow.keras.utils import custom_object_scope

In [44]:
train_data, test_data = tfds.load(
    name="imdb_reviews",
    split=('train[:80%]', 'test[80%:]'),
    as_supervised=True)

train_examples, train_labels = next(iter(train_data.batch(20000)))
val_examples, val_labels = next(iter(test_data.batch(5000)))
test_examples, test_labels = next(iter(test_data.batch(1000)))

In [130]:
#allow us to get the hidden layer
bert_tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
bert_model = TFBertModel.from_pretrained('bert-base-cased', output_hidden_states=True)
MAX_SEQUENCE_LENGTH = 100

In [116]:
#BERT Tokenization of training and test data
#Embedding size of Bert tokenizer: 768
#Dictionary size of Bert tokenizer: 28,996


train_examples_str = [x.decode('utf-8') for x in train_examples.numpy()]
val_examples_str = [x.decode('utf-8') for x in val_examples.numpy()]
test_examples_str = [x.decode('utf-8') for x in test_examples.numpy()]

#train
bert_train_tokenized = bert_tokenizer(train_examples_str,
              max_length=MAX_SEQUENCE_LENGTH,
              truncation=True,
              padding='max_length',
              return_tensors='tf')
bert_train_inputs = [bert_train_tokenized.input_ids,
                     bert_train_tokenized.token_type_ids,
                     bert_train_tokenized.attention_mask]
bert_train_labels = np.array(train_labels)

#val
bert_val_tokenized = bert_tokenizer(val_examples_str,
              max_length=MAX_SEQUENCE_LENGTH,
              truncation=True,
              padding='max_length',
              return_tensors='tf')
bert_val_inputs = [bert_val_tokenized.input_ids,
                     bert_val_tokenized.token_type_ids,
                     bert_val_tokenized.attention_mask]
bert_val_labels = np.array(val_labels)


#test
bert_test_tokenized = bert_tokenizer(test_examples_str,
              max_length=MAX_SEQUENCE_LENGTH,
              truncation=True,
              padding='max_length',
              return_tensors='tf')
bert_test_inputs = [bert_test_tokenized.input_ids,
                     bert_test_tokenized.token_type_ids,
                     bert_test_tokenized.attention_mask]
bert_test_labels = np.array(test_labels)

In [131]:
#12 layers of transformer
#A drop out layer + dense layer with 100 hidden layer size on top + final layer with sigmoid as activation function

def create_bert_cls_model(bert_base_model,
                          max_sequence_length=MAX_SEQUENCE_LENGTH,
                          hidden_size = 100,
                          dropout=0.3,
                          learning_rate=0.00005,
                          output_cls_tokens=False):
    """
    Build a simple classification model with BERT. Use the CLS Token output for classification purposes.
    """

    bert_base_model.trainable = True #True

    #input layers of BERT, shape (batch, max_sequence_length), model will be fit with bert_train_tokenized
    input_ids = Input(shape=(max_sequence_length,), dtype=tf.int32, name='input_ids')
    token_type_ids = Input(shape=(max_sequence_length,), dtype=tf.int32, name='token_type_ids')
    attention_mask = Input(shape=(max_sequence_length,), dtype=tf.int32, name='attention_mask')

    inputs = [input_ids, token_type_ids, attention_mask]

    #BERT output, last_hidden_state shape (batch, max_sequence_length, embedding dimensions)
    bert_output = bert_base_model(input_ids=input_ids,
                                  token_type_ids=token_type_ids,
                                  attention_mask=attention_mask,
                                  output_hidden_states=output_cls_tokens)

    #Extract the CLS token's output, the embedding representation of first token of every sentence, shape(batch, embedding dimensions)
    cls_token_output = bert_output[0][:, 0, :] # CLS token output from the last layer

    #Add a dropout layer
    x = Dropout(dropout)(cls_token_output)

    #Add a fully connected layer for classification
    x = Dense(hidden_size, activation='relu')(x)

    #Final output layer for classification, assuming it's binary task
    output = Dense(1, activation='sigmoid')(x)


    # CLS output for each layer of transformer
    if output_cls_tokens:
        cls_outputs = [state[:, 0, :] for state in bert_output[2]] # CLS token outputs from all layers
        model_outputs = [output] + cls_outputs

    else:
        model_outputs = output


    #Model complie
    classification_model = Model(inputs=inputs, outputs=model_outputs)
    classification_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                                 loss='binary_crossentropy',
                                 metrics=['accuracy'])

    return classification_model

"""
bert_output[2]: When the output_hidden_states parameter is set to True, this output provides the hidden states from all layers of the BERT model.
It is a list of tensors, where each tensor corresponds to the hidden states of a specific layer.
The shape of each tensor in this list is (batch_size, sequence_length, hidden_size), similar to bert_output[0], but for each individual layer.
"""


'\nbert_output[2]: When the output_hidden_states parameter is set to True, this output provides the hidden states from all layers of the BERT model.\nIt is a list of tensors, where each tensor corresponds to the hidden states of a specific layer.\nThe shape of each tensor in this list is (batch_size, sequence_length, hidden_size), similar to bert_output[0], but for each individual layer.\n'

In [129]:
tf.keras.backend.clear_session()

In [132]:
#bert_model
bert_cls_model_classification = create_bert_cls_model(bert_model, output_cls_tokens=False)
history_cls_bert= bert_cls_model_classification.fit(bert_train_inputs,
                                                    bert_train_labels,
                                                    epochs=2, #2
                                                    batch_size=8, #8
                                                    validation_data=(bert_val_inputs, bert_val_labels))

Epoch 1/2




Epoch 2/2


In [120]:
#save the model
# Assuming 'bert_cls_model_classification' is your trained model
model_h5_path = "content/sample_data/save/model_finetuned_BERT.h5"  # Replace with your desired path

# Register TFBertModel as a custom object
with custom_object_scope({'TFBertModel': TFBertModel}):
    bert_cls_model_classification.save(model_h5_path)

  saving_api.save_model(


In [133]:
#Model before zeroing out

bert_cls_model_classification.evaluate(bert_test_inputs, bert_test_labels)



[0.3517876863479614, 0.8519999980926514]

In [103]:
# #load the model
# with custom_object_scope({'TFBertModel': TFBertModel}):
#     bert_cls_model_classification = tf.keras.models.load_model(model_h5_path)

In [58]:
original_list = list(range(3072))
np.random.shuffle(original_list)
print(original_list[:307])

masked_neurons_list = [original_list[:307]]*12
len(masked_neurons_list)

[567, 1912, 1377, 2437, 2362, 3063, 1278, 1296, 2234, 2949, 2397, 1573, 240, 88, 2621, 312, 631, 77, 1540, 750, 2126, 1991, 2693, 543, 136, 128, 1662, 1317, 909, 700, 1568, 597, 2142, 614, 1376, 204, 2741, 1390, 2975, 1727, 2707, 503, 415, 235, 187, 3, 1042, 2623, 728, 2668, 346, 2149, 1764, 1240, 954, 1619, 316, 1323, 1330, 1094, 856, 1478, 629, 1640, 2105, 2115, 306, 1587, 1856, 2942, 2720, 2401, 2660, 1947, 310, 2656, 2076, 2569, 655, 1294, 948, 236, 443, 1034, 1694, 1481, 2611, 1719, 1501, 1282, 1236, 1178, 1329, 1071, 2496, 1790, 2783, 1488, 775, 2318, 3070, 1452, 1599, 1672, 1821, 2489, 1314, 1147, 1375, 1995, 2827, 2652, 1074, 124, 1249, 99, 1591, 2493, 968, 1032, 560, 1996, 3000, 297, 2653, 1870, 2470, 157, 843, 2404, 2132, 1223, 893, 2232, 1108, 2202, 2069, 1062, 2182, 2486, 374, 2560, 2873, 168, 2248, 169, 533, 1940, 369, 207, 405, 2319, 477, 647, 1593, 2469, 2218, 1424, 2819, 2348, 740, 1025, 623, 2885, 92, 681, 1492, 2227, 2929, 686, 14, 1383, 1659, 1734, 994, 549, 2415, 28

12

In [135]:
"""
Cosine similiarity >　0.3

=================Input 0====================
Layer 0, Neurons: ['Layer 0 Neuron 75', 'Layer 0 Neuron 239', 'Layer 0 Neuron 283', 'Layer 0 Neuron 307', 'Layer 0 Neuron 489', 'Layer 0 Neuron 537', 'Layer 0 Neuron 2018', 'Layer 0 Neuron 2625', 'Layer 0 Neuron 2670', 'Layer 0 Neuron 2773']
Layer 1, Neurons: ['Layer 1 Neuron 11', 'Layer 1 Neuron 113', 'Layer 1 Neuron 376', 'Layer 1 Neuron 524', 'Layer 1 Neuron 672', 'Layer 1 Neuron 1685', 'Layer 1 Neuron 1766', 'Layer 1 Neuron 1838']
Layer 2, Neurons: ['Layer 2 Neuron 57', 'Layer 2 Neuron 789', 'Layer 2 Neuron 923', 'Layer 2 Neuron 1316', 'Layer 2 Neuron 2719', 'Layer 2 Neuron 2801', 'Layer 2 Neuron 3062']
Layer 3, Neurons: ['Layer 3 Neuron 323', 'Layer 3 Neuron 693', 'Layer 3 Neuron 1469', 'Layer 3 Neuron 1795', 'Layer 3 Neuron 2233']
Layer 4, Neurons: ['Layer 4 Neuron 102', 'Layer 4 Neuron 170', 'Layer 4 Neuron 642', 'Layer 4 Neuron 915', 'Layer 4 Neuron 2116', 'Layer 4 Neuron 2548']
Layer 5, Neurons: ['Layer 5 Neuron 55']
Layer 6, Neurons: ['Layer 6 Neuron 1443', 'Layer 6 Neuron 1506', 'Layer 6 Neuron 1712']
Layer 7, Neurons: []
Layer 8, Neurons: ['Layer 8 Neuron 763']
Layer 9, Neurons: ['Layer 9 Neuron 2528']
Layer 10, Neurons: []
Layer 11, Neurons: ['Layer 11 Neuron 1023']

=================Input 1====================
Layer 0, Neurons: ['Layer 0 Neuron 75', 'Layer 0 Neuron 283', 'Layer 0 Neuron 489', 'Layer 0 Neuron 2625', 'Layer 0 Neuron 2773']
Layer 1, Neurons: ['Layer 1 Neuron 1685', 'Layer 1 Neuron 1766']
Layer 2, Neurons: ['Layer 2 Neuron 789']
Layer 3, Neurons: ['Layer 3 Neuron 693']
Layer 4, Neurons: ['Layer 4 Neuron 170']
Layer 5, Neurons: []
Layer 6, Neurons: ['Layer 6 Neuron 1443']
Layer 7, Neurons: []
Layer 8, Neurons: ['Layer 8 Neuron 763']
Layer 9, Neurons: ['Layer 9 Neuron 2528']
Layer 10, Neurons: []
Layer 11, Neurons: []

=================Input 2====================
Layer 0, Neurons: ['Layer 0 Neuron 75', 'Layer 0 Neuron 239', 'Layer 0 Neuron 283', 'Layer 0 Neuron 307', 'Layer 0 Neuron 489', 'Layer 0 Neuron 537', 'Layer 0 Neuron 2018', 'Layer 0 Neuron 2625', 'Layer 0 Neuron 2670', 'Layer 0 Neuron 2773']
Layer 1, Neurons: ['Layer 1 Neuron 11', 'Layer 1 Neuron 113', 'Layer 1 Neuron 376', 'Layer 1 Neuron 524', 'Layer 1 Neuron 672', 'Layer 1 Neuron 1685', 'Layer 1 Neuron 1766', 'Layer 1 Neuron 1838']
Layer 2, Neurons: ['Layer 2 Neuron 57', 'Layer 2 Neuron 789', 'Layer 2 Neuron 923', 'Layer 2 Neuron 1316', 'Layer 2 Neuron 2719', 'Layer 2 Neuron 2801', 'Layer 2 Neuron 3062']
Layer 3, Neurons: ['Layer 3 Neuron 323', 'Layer 3 Neuron 693', 'Layer 3 Neuron 1469', 'Layer 3 Neuron 1795', 'Layer 3 Neuron 2233']
Layer 4, Neurons: ['Layer 4 Neuron 102', 'Layer 4 Neuron 170', 'Layer 4 Neuron 642', 'Layer 4 Neuron 915', 'Layer 4 Neuron 2116', 'Layer 4 Neuron 2548']
Layer 5, Neurons: ['Layer 5 Neuron 55']
Layer 6, Neurons: ['Layer 6 Neuron 1443', 'Layer 6 Neuron 1506', 'Layer 6 Neuron 1712']
Layer 7, Neurons: []
Layer 8, Neurons: ['Layer 8 Neuron 763']
Layer 9, Neurons: ['Layer 9 Neuron 2528']
Layer 10, Neurons: []
Layer 11, Neurons: ['Layer 11 Neuron 1023']

=================Input 3====================
Layer 0, Neurons: ['Layer 0 Neuron 75', 'Layer 0 Neuron 283', 'Layer 0 Neuron 489', 'Layer 0 Neuron 2625', 'Layer 0 Neuron 2773']
Layer 1, Neurons: ['Layer 1 Neuron 1685', 'Layer 1 Neuron 1766']
Layer 2, Neurons: ['Layer 2 Neuron 789']
Layer 3, Neurons: ['Layer 3 Neuron 693']
Layer 4, Neurons: ['Layer 4 Neuron 170']
Layer 5, Neurons: []
Layer 6, Neurons: ['Layer 6 Neuron 1443']
Layer 7, Neurons: []
Layer 8, Neurons: ['Layer 8 Neuron 763']
Layer 9, Neurons: ['Layer 9 Neuron 2528']
Layer 10, Neurons: []
Layer 11, Neurons: []

=================Input 8====================
Layer 0, Neurons: ['Layer 0 Neuron 75', 'Layer 0 Neuron 239', 'Layer 0 Neuron 283', 'Layer 0 Neuron 307', 'Layer 0 Neuron 489', 'Layer 0 Neuron 537', 'Layer 0 Neuron 765', 'Layer 0 Neuron 1669', 'Layer 0 Neuron 2018', 'Layer 0 Neuron 2625', 'Layer 0 Neuron 2670', 'Layer 0 Neuron 2773']
Layer 1, Neurons: ['Layer 1 Neuron 11', 'Layer 1 Neuron 113', 'Layer 1 Neuron 376', 'Layer 1 Neuron 524', 'Layer 1 Neuron 672', 'Layer 1 Neuron 1685', 'Layer 1 Neuron 1766', 'Layer 1 Neuron 1838']
Layer 2, Neurons: ['Layer 2 Neuron 57', 'Layer 2 Neuron 789', 'Layer 2 Neuron 923', 'Layer 2 Neuron 1316', 'Layer 2 Neuron 2719', 'Layer 2 Neuron 3062']
Layer 3, Neurons: ['Layer 3 Neuron 323', 'Layer 3 Neuron 693', 'Layer 3 Neuron 1469', 'Layer 3 Neuron 1795', 'Layer 3 Neuron 2233']
Layer 4, Neurons: ['Layer 4 Neuron 102', 'Layer 4 Neuron 170', 'Layer 4 Neuron 915']
Layer 5, Neurons: ['Layer 5 Neuron 55']
Layer 6, Neurons: ['Layer 6 Neuron 1506', 'Layer 6 Neuron 1712']
Layer 7, Neurons: []
Layer 8, Neurons: ['Layer 8 Neuron 763']
Layer 9, Neurons: ['Layer 9 Neuron 2528']
Layer 10, Neurons: []
Layer 11, Neurons: ['Layer 11 Neuron 1023']

=================Input 9====================
Layer 0, Neurons: ['Layer 0 Neuron 75', 'Layer 0 Neuron 239', 'Layer 0 Neuron 283', 'Layer 0 Neuron 489', 'Layer 0 Neuron 2625', 'Layer 0 Neuron 2773']
Layer 1, Neurons: ['Layer 1 Neuron 524', 'Layer 1 Neuron 1685', 'Layer 1 Neuron 1766']
Layer 2, Neurons: ['Layer 2 Neuron 789']
Layer 3, Neurons: ['Layer 3 Neuron 693']
Layer 4, Neurons: ['Layer 4 Neuron 170']
Layer 5, Neurons: []
Layer 6, Neurons: ['Layer 6 Neuron 1443']
Layer 7, Neurons: []
Layer 8, Neurons: ['Layer 8 Neuron 763']
Layer 9, Neurons: ['Layer 9 Neuron 2528']
Layer 10, Neurons: []
Layer 11, Neurons: []
"""





#Mask after second layer of FFN
preserved_neuron_list = [[75, 239, 283, 307, 489, 537, 2018, 2625, 2670, 2773, 765, 1669],
                       [11, 113, 376, 524, 672, 1685, 1766, 1838],
                       [57, 789, 923,1316 ,2719, 2801, 3062],
                       [323, 693, 1469, 1795, 2233],
                       [102, 170, 642, 915, 2116, 2548],
                       [55],
                       [1443, 1506, 1712],
                       [],
                       [763],
                       [2528],
                       [],
                       [1023]]

num_neurons = 3072
masks = []

original_list = list(range(3072))
np.random.shuffle(original_list, )
masked_neurons_list = [original_list[:2457]]*12

for i, masked_neurons in enumerate(masked_neurons_list):
    mask = np.ones(num_neurons)
    if masked_neurons not in preserved_neuron_list[i]:
      mask[masked_neurons] = 0
      masks.append(mask)

In [112]:
print(len(masks))
print(masks[0].shape)
print(masks[0].reshape(-1,).shape)
print(masks[0])

12
(3072,)
(3072,)
[0. 0. 0. ... 0. 0. 0.]


In [26]:
layer = 'tf_bert_model/bert/encoder/layer_._0/output/dense/kernel'
layer.split('/')[3].split('_')

['layer', '.', '0']

In [136]:
#zero out -> quantization + sparse matrix pruning
# Assuming bert_model is your pre-trained BERT model
#tf_bert_model/bert/encoder/layer_._0/output/dense/kernel

for var in bert_model.variables:
    if 'output/dense/kernel' in var.name and 'attention' not in var.name:
        # Extract layer number from variable name
        layer_num = int(var.name.split('/')[3].split('_')[2])

        # Get the current weights
        weights = var.numpy()

        # Apply the mask #(3072,)
        mask = masks[layer_num]
        weights *= mask.reshape(-1, 1)  # Reshape mask and apply to weights

        # Assign the modified weights back to the variable
        var.assign(weights)


In [137]:
#to check in if the weights are correctly zero out
for var in bert_model.variables:
    if 'output/dense/kernel' in var.name  and 'attention' not in var.name:  # Checking for the first layer as an example
        print(var.name, var.numpy()[0:5, 0:5])  # Print a small section of the weights

tf_bert_model/bert/encoder/layer_._0/output/dense/kernel:0 [[ 0. -0. -0.  0.  0.]
 [ 0. -0.  0.  0.  0.]
 [-0.  0. -0. -0.  0.]
 [-0. -0. -0.  0.  0.]
 [ 0.  0.  0. -0.  0.]]
tf_bert_model/bert/encoder/layer_._1/output/dense/kernel:0 [[-0.  0.  0. -0.  0.]
 [ 0.  0.  0. -0.  0.]
 [ 0. -0. -0. -0.  0.]
 [-0. -0. -0. -0. -0.]
 [ 0. -0. -0.  0.  0.]]
tf_bert_model/bert/encoder/layer_._2/output/dense/kernel:0 [[-0.  0. -0.  0. -0.]
 [ 0.  0.  0. -0. -0.]
 [ 0. -0.  0. -0.  0.]
 [ 0.  0.  0. -0. -0.]
 [-0. -0. -0. -0. -0.]]
tf_bert_model/bert/encoder/layer_._3/output/dense/kernel:0 [[ 0.  0.  0.  0. -0.]
 [-0. -0.  0. -0.  0.]
 [-0.  0.  0.  0.  0.]
 [-0. -0.  0.  0.  0.]
 [-0. -0. -0.  0. -0.]]
tf_bert_model/bert/encoder/layer_._4/output/dense/kernel:0 [[-0. -0. -0.  0.  0.]
 [ 0. -0.  0.  0.  0.]
 [-0.  0. -0. -0. -0.]
 [-0.  0. -0.  0.  0.]
 [ 0. -0.  0.  0. -0.]]
tf_bert_model/bert/encoder/layer_._5/output/dense/kernel:0 [[ 0. -0. -0. -0. -0.]
 [ 0. -0. -0. -0. -0.]
 [-0. -0.  0. -0. -0

In [138]:
#bert_model has been zeroed out.
bert_cls_model_classification.evaluate(bert_test_inputs, bert_test_labels)

#accuarcy kept the same. after pruning 10% (307) of neurons (zeroing out). --0.843
#test in sequence / directly pruning
#50% (1531) -- 0.822
#80% (2457) -- 0.747 / 0.71
#90% (2764) -- 0.736 / not yet
#95% (2922) -- 0.734 / 0.47

#Impending for further verifying



[0.5675219893455505, 0.718999981880188]

In [125]:
# #load the model
# with custom_object_scope({'TFBertModel': TFBertModel}):
#     bert_cls_model_classification = tf.keras.models.load_model(model_h5_path)
# Can't load the model and pruning it. it's different.

In [None]:
# Parameter Quantization is easier. low cosine similarity -> lower the digit from 32 to 2.
#1. Directly zero out the FFN-> quantization + pruning
#2. Need to check it out if we assign 0. I think tensorflow would ask us to keep the consistency of matrix to float32. so here, zeroed out item is still saved as float32 and not benifits from zeroing out
#3. Need to check if the GPU can benefit  from sparse input. some GPU or powered by well-designed SW can speed up computation of sparse matrix.
#4. Attension layer?