In [1]:
!pip install transformers
import tensorflow as tf
from transformers import TFBertForSequenceClassification, BertTokenizer
import tensorflow_datasets as tfds
import numpy as np
from tensorflow.keras import mixed_precision
import pandas as pd

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.28.1-py3-none-any.whl (7.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m85.1 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m112.6 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.14.0-py3-none-any.whl (224 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.2/224.2 kB[0m [31m30.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.14.0 tokenizers-0.13.3 transformers-4.28.1


# Emotion Detection

In [2]:
## First Task -- Emotion Detection
# Load the data
df_train = pd.read_csv("emotion_train.txt", delimiter=';', header=None, names=['sentence','label'])
df_test = pd.read_csv("emotion_test.txt", delimiter=';', header=None, names=['sentence','label'])

#df_train=df_train.iloc[:100]
#df_test=df_test.iloc[:20]
# Create a BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Tokenize the sentences
train_encodings = tokenizer(list(df_train['sentence']), truncation=True, padding=True)
test_encodings = tokenizer(list(df_test['sentence']), truncation=True, padding=True)

# Convert labels to integers
labels = {'anger': 0, 'fear': 1, 'joy': 2, 'love': 3, 'sadness': 4, 'surprise': 5}
df_train['label'] = df_train['label'].apply(lambda x: labels[x])
df_test['label'] = df_test['label'].apply(lambda x: labels[x])

# Convert the data to TensorFlow Dataset objects
train_dataset = tf.data.Dataset.from_tensor_slices((dict(train_encodings), df_train['label']))
test_dataset = tf.data.Dataset.from_tensor_slices((dict(test_encodings), df_test['label']))

# Specify the number of classes
num_classes = len(labels)

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [3]:
# Load the BERT model
model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=num_classes)

# Compile the model
policy = tf.keras.mixed_precision.Policy('mixed_float16')
tf.keras.mixed_precision.set_global_policy(policy)

# define optimizer
optimizer = tf.keras.optimizers.Adam(learning_rate=2e-5)
optimizer = mixed_precision.LossScaleOptimizer(optimizer, dynamic=True)

# Define loss and metric
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')
model.compile(optimizer=optimizer, loss=loss, metrics=[metric])

# Train the model
model.fit(train_dataset.batch(16), epochs=1)
test_loss, test_accuracy = model.evaluate(test_dataset.batch(16))
print("Test loss:", test_loss)
print("Test accuracy:", test_accuracy)
# Save the weight of the model for the first task
model.save_weights("task1_weights.h5")

#6mins

Downloading tf_model.h5:   0%|          | 0.00/536M [00:00<?, ?B/s]

All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Test loss: 0.16707246005535126
Test accuracy: 0.9290000200271606


# Sentiment Analysis

In [4]:
## First Task -- Sentiment Analysis
# Load dataset
(ds_train, ds_test), ds_info = tfds.load('imdb_reviews',
                                         split=(tfds.Split.TRAIN, tfds.Split.TEST),
                                         as_supervised=True,
                                         with_info=True)

# Define tokenizer and constants
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)
max_length = 512
batch_size = 6

# Take the first 1000 elements of the train dataset
#ds_train = ds_train.take(200)
#ds_test = ds_test.take(100)


Downloading and preparing dataset Unknown size (download: Unknown size, generated: Unknown size, total: Unknown size) to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Generating splits...:   0%|          | 0/3 [00:00<?, ? splits/s]

Generating train examples...: 0 examples [00:00, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0.incompleteYUSX6F/imdb_reviews-train.tfrecord…

Generating test examples...: 0 examples [00:00, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0.incompleteYUSX6F/imdb_reviews-test.tfrecord*…

Generating unsupervised examples...: 0 examples [00:00, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0.incompleteYUSX6F/imdb_reviews-unsupervised.t…

Dataset imdb_reviews downloaded and prepared to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0. Subsequent calls will reuse this data.


In [5]:
# Define function to convert examples to BERT inputs
def convert_example_to_feature(review):
    encoding = tokenizer.encode_plus(
                 review,
                 add_special_tokens=True,
                 max_length=max_length,
                 pad_to_max_length=True,
                 return_attention_mask=True,
                 return_token_type_ids=True,
                 truncation=True
               )
    return {
        'input_ids': tf.constant(encoding['input_ids']),
        'attention_mask': tf.constant(encoding['attention_mask']),
        'token_type_ids': tf.constant(encoding['token_type_ids'])
    }

# Define function to map examples to dictionary format
def map_example_to_dict(input_ids, attention_masks, token_type_ids, label):
    return {
        "input_ids": input_ids,
        "attention_mask": attention_masks, 
        "token_type_ids": token_type_ids,  
        "labels": label
    }

# Define function to encode examples and create TensorFlow dataset
def encode_examples(ds, limit=-1):
    input_ids_list = []
    token_type_ids_list = []
    attention_mask_list = []
    label_list = []
    if (limit > 0):
        ds = ds.take(limit)
    for review, label in tfds.as_numpy(ds):
        bert_input = convert_example_to_feature(review.decode())
        input_ids_list.append(bert_input['input_ids'])
        token_type_ids_list.append(bert_input['token_type_ids'])
        attention_mask_list.append(bert_input['attention_mask'])
        label_list.append([label])

    return tf.data.Dataset.from_tensor_slices((input_ids_list, attention_mask_list, token_type_ids_list, label_list)).map(map_example_to_dict)

# Encode train and test datasets
ds_train_encoded = encode_examples(ds_train).shuffle(10000).batch(batch_size)
ds_test_encoded = encode_examples(ds_test).batch(batch_size)

# Enable mixed precision training
policy = tf.keras.mixed_precision.Policy('mixed_float16')
tf.keras.mixed_precision.set_global_policy(policy)

# Initialize model and optimizer
model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=6)
# define optimizer
optimizer = tf.keras.optimizers.Adam(learning_rate=2e-5)
optimizer = mixed_precision.LossScaleOptimizer(optimizer, dynamic=True)

# Define loss and metric
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')

# Train the model for one epoch without EWC
model.compile(optimizer=optimizer, loss=loss, metrics=[metric])
model.fit(ds_train_encoded, epochs=1, validation_data=ds_test_encoded)

# Save the weight of the model for the second task
model.save_weights("task2_weights.h5")

#36mins

All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




In [6]:

class EWCRegularizer(tf.keras.regularizers.Regularizer):
    def __init__(self, model, tasks, alpha=0.1):
        self.model = model
        self.tasks = tasks
        self.alpha = alpha
        self.task_names = []
        self.params = []
        self.precision_matrices = []
        
        for task in tasks:
            self.task_names.append(task[0])
            self.model.load_weights(task[1])
            self.params.append(self.model.get_weights())
            self.precision_matrices.append([tf.zeros_like(p) for p in self.params[-1]])
            
    def __call__(self, x):
        regularization_loss = tf.constant(0.0, dtype=tf.float8)
        idx = 0
        for i, layer in enumerate(self.model.layers):
            weights = layer.weights
            for j, weight in enumerate(weights):
                if weight in layer.trainable_weights:
                    mean = self.params[-1][idx]
                    precision_matrix = self.precision_matrices[-1][idx]
                    
                    for k in range(len(self.tasks)-1):
                        prev_params = self.params[k]
                        prev_mean = prev_params[idx]
                        prev_precision_matrix = self.precision_matrices[k][idx]
                        precision_matrix = precision_matrix + (prev_precision_matrix / self.alpha)
                        mean = mean - ((prev_mean - self.params[-1][idx]) / self.alpha)
                        
                    fisher_matrix = tf.square(tf.gradients(layer.output, weight)[0])
                    precision_matrix = precision_matrix + fisher_matrix
                    self.precision_matrices[-1][idx] = precision_matrix
                    mean = tf.cast(mean, dtype=tf.float8)
                    precision_matrix = tf.cast(precision_matrix, dtype=tf.float8)
                    regularization_loss += tf.reduce_sum(precision_matrix * tf.square(weight - mean)) * (self.alpha / 2.0)
                    idx += 1
        return regularization_loss


In [9]:
# create an instance of the EWCRegularizer class
tasks = [("task1", "task1_weights.h5"), ("task2", "task2_weights.h5")]
ewc_regularizer = EWCRegularizer(model, tasks)
num_epochs=5

# Train loop
for epoch in range(num_epochs):
    print(f'Epoch {epoch + 1}/{num_epochs}')
    for step, batch in enumerate(ds_train_encoded):
        # unpack the batch
        input_ids = batch['input_ids']
        attention_mask = batch['attention_mask']
        token_type_ids = batch['token_type_ids']  
        
        labels = batch['labels']

        # compute the gradients for the current batch
        with tf.GradientTape() as tape:
            outputs = model(input_ids, token_type_ids=token_type_ids, attention_mask=attention_mask, training=True)
            loss_value = loss(labels, outputs.logits)
        gradients = tape.gradient(loss_value, model.trainable_variables)

        # update the model weights
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        # update training metrics
        metric.update_state(labels, outputs.logits)
        train_acc = metric.result().numpy()

        # log progress
        print(f'Training loss (for one batch) at step {step}: {loss_value.numpy():.4f}')
        print(f'Training accuracy (for one batch) at step {step}: {train_acc:.4f}')
        if step == 100:
            break

    # evaluate on test set at end of each epoch
    for batch in ds_test_encoded:
        input_ids = batch['input_ids']
        attention_mask = batch['attention_mask']
        token_type_ids = batch['token_type_ids']  
        
        labels = batch['labels']

        # compute the outputs and update test metrics
        outputs = model(input_ids, token_type_ids=token_type_ids, attention_mask=attention_mask, training=False)
        metric.update_state(labels, outputs.logits)

    # log test metrics
    test_acc = metric.result().numpy()
    print(f'Test accuracy at end of epoch {epoch + 1}: {test_acc:.4f}')
    metric.reset_states()

#20mins

Epoch 1/5
Training loss (for one batch) at step 0: 0.1316
Training accuracy (for one batch) at step 0: 0.1451
Training loss (for one batch) at step 1: 0.0945
Training accuracy (for one batch) at step 1: 0.1476
Training loss (for one batch) at step 2: 0.1113
Training accuracy (for one batch) at step 2: 0.1501
Training loss (for one batch) at step 3: 0.0260
Training accuracy (for one batch) at step 3: 0.1527
Training loss (for one batch) at step 4: 0.0620
Training accuracy (for one batch) at step 4: 0.1552
Training loss (for one batch) at step 5: 0.4070
Training accuracy (for one batch) at step 5: 0.1572
Training loss (for one batch) at step 6: 0.0476
Training accuracy (for one batch) at step 6: 0.1596
Training loss (for one batch) at step 7: 0.2057
Training accuracy (for one batch) at step 7: 0.1616
Training loss (for one batch) at step 8: 0.5767
Training accuracy (for one batch) at step 8: 0.1631
Training loss (for one batch) at step 9: 0.0531
Training accuracy (for one batch) at step 

KeyboardInterrupt: ignored