<a href="https://colab.research.google.com/github/RyanChen12035/w266-NLP/blob/main/w266_final_model1_prunning_strategy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pydot --quiet
!pip install tensorflow-datasets --quiet
!pip install transformers --quiet

In [None]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

In [None]:
!nvidia-smi -L

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Embedding, Input, Dense, Lambda, Dropout, Conv1D, GlobalMaxPooling1D, Concatenate, Activation
from tensorflow.keras.models import Model
import tensorflow.keras.backend as K
import tensorflow_datasets as tfds
from transformers import BertTokenizer, TFBertModel
from transformers import logging
logging.set_verbosity_error()
import sklearn as sk
import os
from nltk.data import find
import matplotlib.pyplot as plt
import re

In [None]:
train_data, test_data = tfds.load(
    name="imdb_reviews",
    split=('train[:80%]', 'test[80%:]'),
    as_supervised=True)

train_examples, train_labels = next(iter(train_data.batch(20000)))
val_examples, val_labels = next(iter(test_data.batch(5000)))
test_examples, test_labels = next(iter(test_data.batch(1000)))

In [None]:
#allow us to get the hidden layer
bert_tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
bert_model = TFBertModel.from_pretrained('bert-base-cased', output_hidden_states=True)
MAX_SEQUENCE_LENGTH = 100

In [None]:
#BERT Tokenization of training and test data
#Embedding size of Bert tokenizer: 768
#Dictionary size of Bert tokenizer: 28,996


train_examples_str = [x.decode('utf-8') for x in train_examples.numpy()]
val_examples_str = [x.decode('utf-8') for x in val_examples.numpy()]
test_examples_str = [x.decode('utf-8') for x in test_examples.numpy()]

#train
bert_train_tokenized = bert_tokenizer(train_examples_str,
              max_length=MAX_SEQUENCE_LENGTH,
              truncation=True,
              padding='max_length',
              return_tensors='tf')
bert_train_inputs = [bert_train_tokenized.input_ids,
                     bert_train_tokenized.token_type_ids,
                     bert_train_tokenized.attention_mask]
bert_train_labels = np.array(train_labels)

#val
bert_val_tokenized = bert_tokenizer(val_examples_str,
              max_length=MAX_SEQUENCE_LENGTH,
              truncation=True,
              padding='max_length',
              return_tensors='tf')
bert_val_inputs = [bert_val_tokenized.input_ids,
                     bert_val_tokenized.token_type_ids,
                     bert_val_tokenized.attention_mask]
bert_val_labels = np.array(val_labels)


#test
bert_test_tokenized = bert_tokenizer(test_examples_str,
              max_length=MAX_SEQUENCE_LENGTH,
              truncation=True,
              padding='max_length',
              return_tensors='tf')
bert_test_inputs = [bert_test_tokenized.input_ids,
                     bert_test_tokenized.token_type_ids,
                     bert_test_tokenized.attention_mask]
bert_test_labels = np.array(test_labels)

In [None]:
#12 layers of transformer
#A drop out layer + dense layer with 100 hidden layer size on top + final layer with sigmoid as activation function

def create_bert_cls_model(bert_base_model,
                          max_sequence_length=MAX_SEQUENCE_LENGTH,
                          hidden_size = 100,
                          dropout=0.3,
                          learning_rate=0.00005,
                          output_cls_tokens=False):
    """
    Build a simple classification model with BERT. Use the CLS Token output for classification purposes.
    """

    bert_base_model.trainable = True #True

    #input layers of BERT, shape (batch, max_sequence_length), model will be fit with bert_train_tokenized
    input_ids = Input(shape=(max_sequence_length,), dtype=tf.int32, name='input_ids')
    token_type_ids = Input(shape=(max_sequence_length,), dtype=tf.int32, name='token_type_ids')
    attention_mask = Input(shape=(max_sequence_length,), dtype=tf.int32, name='attention_mask')

    inputs = [input_ids, token_type_ids, attention_mask]

    #BERT output, last_hidden_state shape (batch, max_sequence_length, embedding dimensions)
    bert_output = bert_base_model(input_ids=input_ids,
                                  token_type_ids=token_type_ids,
                                  attention_mask=attention_mask,
                                  output_hidden_states=output_cls_tokens)

    #Extract the CLS token's output, the embedding representation of first token of every sentence, shape(batch, embedding dimensions)
    cls_token_output = bert_output[0][:, 0, :] # CLS token output from the last layer

    #Add a dropout layer
    x = Dropout(dropout)(cls_token_output)

    #Add a fully connected layer for classification
    x = Dense(hidden_size, activation='relu')(x)

    #Final output layer for classification, assuming it's binary task
    output = Dense(1, activation='sigmoid')(x)


    # CLS output for each layer of transformer
    if output_cls_tokens:
        cls_outputs = [state[:, 0, :] for state in bert_output[2]] # CLS token outputs from all layers
        model_outputs = [output] + cls_outputs

    else:
        model_outputs = output


    #Model complie
    classification_model = Model(inputs=inputs, outputs=model_outputs)
    classification_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                                 loss='binary_crossentropy',
                                 metrics=['accuracy'])

    return classification_model

"""
bert_output[2]: When the output_hidden_states parameter is set to True, this output provides the hidden states from all layers of the BERT model.
It is a list of tensors, where each tensor corresponds to the hidden states of a specific layer.
The shape of each tensor in this list is (batch_size, sequence_length, hidden_size), similar to bert_output[0], but for each individual layer.
"""


In [None]:
tf.keras.backend.clear_session()

In [None]:
#bert_model
bert_cls_model_classification = create_bert_cls_model(bert_model, output_cls_tokens=False)
history_cls_bert= bert_cls_model_classification.fit(bert_train_inputs,
                                                    bert_train_labels,
                                                    epochs=2, #2
                                                    batch_size=8, #8
                                                    validation_data=(bert_val_inputs, bert_val_labels))

In [None]:
#Model before zeroing out

bert_cls_model_classification.evaluate(bert_test_inputs, bert_test_labels)

In [None]:
import time

start_time = time.time()
prediction = bert_cls_model_classification.predict(bert_test_inputs)
end_time = time.time()

elapsed_time = end_time - start_time
print("Elapsed time: {:.2f} seconds".format(elapsed_time))

In [None]:
# Example test reviews
"""
1. Identifying Emotional Tone
Sub-Task: Determine the emotional tone of the review (e.g., positive, negative, neutral).
Test Reviews:
    EX1 "The movie's breathtaking scenery and exceptional soundtrack added depth to its rich storytelling." -> Positive Tone
    EX2 "The film was a letdown with its lackluster plot and uninspired performances." -> Negative Tone
2. Analyzing Subjective Statements
Sub-Task: Detect subjective statements or opinions in the review.
Test Reviews:
    EX3 "In my opinion, the film's portrayal of historical events was highly inaccurate." -> Subjective
    EX4 "The movie won three Academy Awards, including Best Picture." -> Objective
3. Evaluating Specific Aspects (Acting, Plot, Cinematography)
Sub-Task: Assess specific aspects of the movie like acting quality, plot development, and cinematography.
Test Reviews:
    EX5 "The acting was superb, with each character bringing depth and emotion to the screen." -> Positive Acting
    EX6 "The plot was predictable and lacked originality, making the movie quite boring." -> Negative Plot
4. Recognizing Extremes in Sentiment
Sub-Task: Identify reviews with extremely positive or negative sentiments.
Test Reviews:
    EX7 "This is possibly the worst movie ever made, with no redeeming qualities whatsoever." -> Extremely Negative
    EX8 "An absolute masterpiece, every moment was captivating and a joy to watch." -> Extremely Positive
5. Detecting Sarcasm or Irony
Sub-Task: Detect sarcasm or irony, which can often invert the apparent sentiment of a statement.
Test Reviews:
    EX9 "Oh great, another predictable rom-com, just what the world needs." -> Sarcasm
    EX10 "I loved how the movie ended abruptly without resolving any plot points." -> Irony
"""

# First reivews is positive tone and the second is negative tone
test_reviews = [
    "In my opinion, the film's portrayal of historical events was highly inaccurate.",
    "The movie won three Academy Awards, including Best Picture.",
    "In my opinion, the film's portrayal of historical events was highly inaccurate.",
    "The movie won three Academy Awards, including Best Picture.",
    "The acting was superb, with each character bringing depth and emotion to the screen.",
    "The plot was predictable and lacked originality, making the movie quite boring.",
    "This is possibly the worst movie ever made, with no redeeming qualities whatsoever.",
    "An absolute masterpiece, every moment was captivating and a joy to watch.",
    "Oh great, another predictable rom-com, just what the world needs.",
    "I loved how the movie ended abruptly without resolving any plot points."
]

# Tokenize the reviews
# 101:[CLS], 102:[SEP]
token_inputs = bert_tokenizer(test_reviews,
                            max_length=MAX_SEQUENCE_LENGTH,
                            truncation=True,
                            padding='max_length',
                            return_tensors='tf')

inputs = [token_inputs.input_ids,
        token_inputs.token_type_ids,
        token_inputs.attention_mask]

print(inputs)

In [None]:
# Create a model for analysis which includes the hidden states
#bert_model is fine-tuned now, don't have to re-train it. it's an object.
cls_layer_inside_finetunedBERT = create_bert_cls_model(bert_model, output_cls_tokens=True)

#get cls in each layers of transformer inside the fine-tuned BERT
predictions, *cls_hidden_states = cls_layer_inside_finetunedBERT.predict(inputs)
print(cls_hidden_states)

In [None]:
import numpy as np
from numpy.linalg import norm

def calculate_similarities(cls_outputs):
    # Number of examples and layers
    num_examples, num_layers = cls_outputs[0].shape[0], len(cls_outputs)

    # Initializing arrays to store the results
    cosine_similarities = np.zeros((num_examples, num_layers))
    dot_products = np.zeros((num_examples, num_layers))

    # Final layer's CLS output
    final_layer_output = cls_outputs[-1]

    # Calculating similarities and dot products
    for i in range(num_layers):
        for j in range(num_examples):
            # Extracting the CLS output for the current layer and example
            current_output = cls_outputs[i][j]

            # Cosine Similarity
            cosine_similarities[j, i] = np.dot(current_output, final_layer_output[j]) / (norm(current_output) * norm(final_layer_output[j]))

            # Dot Product
            dot_products[j, i] = np.dot(current_output, final_layer_output[j])

    return cosine_similarities, dot_products


cosine_similarities_layer = calculate_similarities(cls_hidden_states)

In [None]:
for var in bert_model.variables:
    print(f"{var.name}: {var.shape}")

In [None]:
def extract_ffn_second_dense_weights(bert_model):
    """
    Extracts the kernel weights from the second dense layer of the FFN in each transformer layer of the BERT model.
    """
    ffn_weights = []

    # Loop through each transformer layer and construct the variable name
    for layer_num in range(bert_model.config.num_hidden_layers):
        # Construct the variable name for the second dense layer weights in the current layer
        weight_name = f"tf_bert_model/bert/encoder/layer_._{layer_num}/output/dense/kernel:0"

        # Find and extract the variable
        for var in bert_model.variables:
            if var.name == weight_name:
                weights = var.numpy()  # Convert to numpy array
                ffn_weights.append(weights)
                break  # Move to the next layer once the weights are found

    return ffn_weights

ffn_weights = extract_ffn_second_dense_weights(bert_model)

In [None]:
# input of EX1. cls_hidden_states (layer, example, embedding dimensions)
cls_token_output = cls_hidden_states[12][0][:]

def compute_cosine_similarities(cls_token_output, ffn_weights):
    # Normalize the CLS token output
    cls_norm = np.linalg.norm(cls_token_output)
    cls_token_normalized = cls_token_output / cls_norm

    cosine_similarities = []

    for layer_weights in ffn_weights:
        # Transpose the weights to align dimensions with CLS token output
        # layer_weights shape is (3072, 768), after transpose it will be (768, 3072)
        transposed_weights = layer_weights.T

        # Normalize the neuron weights
        neuron_norms = np.linalg.norm(transposed_weights, axis=0)
        normalized_neurons = transposed_weights / neuron_norms

        # Compute the dot product
        dot_product = np.dot(cls_token_normalized, normalized_neurons)

        cosine_similarities.append(dot_product)

    return cosine_similarities

#'cls_token_output' is the CLS token output of shape (768,)
# And 'ffn_weights' is a list of arrays, each of shape (3072, 768)
cosine_similarities_neurons = compute_cosine_similarities(cls_token_output, ffn_weights)

print(cosine_similarities_neurons)

In [None]:
#input 5, layer 1, first layer of transfomrers
#output: masked_neurons_list
#input: cosine_similarities_neurons, cosine_similarities_layer, alpha
#if the last token of layer N is close to last layer, it means Nth layer get the right information to make the call and neruons in this layer can handle the task properly,
#As a result, neurons having low simiarlarity in this layer can be removed. The bar can be slightly higher
#cosine similarity can be negative, cos_sin = 0 irrelavent, -1~0, 0~1

preserved_neuron_list = [[75, 239, 283, 307, 489, 537, 2018, 2625, 2670, 2773, 765, 1669],
                       [11, 113, 376, 524, 672, 1685, 1766, 1838],
                       [57, 789, 923,1316 ,2719, 2801, 3062],
                       [323, 693, 1469, 1795, 2233],
                       [102, 170, 642, 915, 2116, 2548],
                       [55],
                       [1443, 1506, 1712],
                       [],
                       [763],
                       [2528],
                       [],
                       [1023]]


def pruning_strategy(cosine_similarities_neurons, cosine_similarities_layer ,preserved_neuron_list, alpha):
    masked_neurons_list = [[] for i in range(12)]

    for layer in range(9):
        if layer %% 2 == 0: #even layers -> tight
            for num, simi in enumerate(cosine_similarities_neurons[layer]):
                if 1.2*cosine_similarities_layer[5][layer]*alpha > simi > -1.2*cosine_similarities_layer[5][layer]*alpha:
                    masked_neurons_list[layer].append(num)
        else: #odd layers -> loose
            for num, simi in enumerate(cosine_similarities_neurons[layer]):
                if 0.8*cosine_similarities_layer[5][layer]*alpha > simi > -0.8*cosine_similarities_layer[5][layer]*alpha:
                    masked_neurons_list[layer].append(num)
        print(f"Threshold of pruning {cosine_similarities_layer[5][layer]*alpha}")
        print(f"number of neruons being masked in layer {layer}: {len(masked_neurons_list[layer])}")

    #layer 9,10,11 -> very tight
    for layer in range(9,12):
        for num, simi in enumerate(cosine_similarities_neurons[layer]):
            if simi < 3*cosine_similarities_layer[5][layer]*alpha:
                masked_neurons_list[layer].append(num)
        print(f"Threshold of pruning {cosine_similarities_layer[5][layer]*alpha}")
        print(f"number of neruons being masked in layer {layer}: {len(masked_neurons_list[layer])}")

    return masked_neurons_list

masked_neurons_list = pruning_strategy(cosine_similarities_neurons, cosine_similarities_layer,preserved_neuron_list, alpha=0.1)
print(masked_neurons_list)

In [None]:
#zero out the weights except presrved neurons.
preserved_neuron_list = [[75, 239, 283, 307, 489, 537, 2018, 2625, 2670, 2773, 765, 1669],
                       [11, 113, 376, 524, 672, 1685, 1766, 1838],
                       [57, 789, 923,1316 ,2719, 2801, 3062],
                       [323, 693, 1469, 1795, 2233],
                       [102, 170, 642, 915, 2116, 2548],
                       [55],
                       [1443, 1506, 1712],
                       [],
                       [763],
                       [2528],
                       [],
                       [1023]]

num_neurons = 3072
masks = []


for i, masked_neurons in enumerate(masked_neurons_list):
    mask = np.ones(num_neurons)
    if masked_neurons not in preserved_neuron_list[i]:
      mask[masked_neurons] = 0
      masks.append(mask)

In [None]:
#zero out -> quantization + sparse matrix pruning
# Assuming bert_model is your pre-trained BERT model
#tf_bert_model/bert/encoder/layer_._0/output/dense/kernel

for var in bert_model.variables:
    if 'output/dense/kernel' in var.name and 'attention' not in var.name:
        # Extract layer number from variable name
        layer_num = int(var.name.split('/')[3].split('_')[2])

        # Get the current weights
        weights = var.numpy()

        # Apply the mask #(3072,)
        mask = masks[layer_num]
        weights *= mask.reshape(-1, 1)  # Reshape mask and apply to weights

        # Assign the modified weights back to the variable
        var.assign(weights)


In [None]:
#bert_model has been zeroed out.
bert_cls_model_classification.evaluate(bert_test_inputs, bert_test_labels)

#            accuracy process time (1000 inputs)



#Impending for further verifying

In [None]:
import time

start_time = time.time()
prediction = bert_cls_model_classification.predict(bert_test_inputs)
end_time = time.time()

elapsed_time = end_time - start_time
print("Elapsed time: {:.2f} seconds".format(elapsed_time))