In [1]:
#!pip install datasets 'tensorflow==2.15'
!pip show tensorflow # This notebook was designed for version 2.15 (which seems to be not available), so I'm going to try run it with the default installed version 2.19

Name: tensorflow
Version: 2.19.0
Summary: TensorFlow is an open source machine learning framework for everyone.
Home-page: https://www.tensorflow.org/
Author: Google Inc.
Author-email: packages@tensorflow.org
License: Apache 2.0
Location: /usr/local/lib/python3.12/dist-packages
Requires: absl-py, astunparse, flatbuffers, gast, google-pasta, grpcio, h5py, keras, libclang, ml-dtypes, numpy, opt-einsum, packaging, protobuf, requests, setuptools, six, tensorboard, termcolor, typing-extensions, wrapt
Required-by: dopamine_rl, tensorflow-text, tensorflow_decision_forests, tf_keras


In [2]:
# Import necessary libraries
from datasets import load_dataset
from transformers import DistilBertTokenizer, TFDistilBertModel
import tensorflow as tf
from tensorflow.keras.layers import Dense, Input, Dropout, Layer, LayerNormalization, Add, MultiHeadAttention
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
import numpy as np

# Step 1: Load the TweetEval datasets
ds_irony = load_dataset("cardiffnlp/tweet_eval", "irony")
ds_stance = load_dataset("cardiffnlp/tweet_eval", "stance_climate")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

irony/train-00000-of-00001.parquet:   0%|          | 0.00/183k [00:00<?, ?B/s]

irony/test-00000-of-00001.parquet:   0%|          | 0.00/54.0k [00:00<?, ?B/s]

irony/validation-00000-of-00001.parquet:   0%|          | 0.00/61.1k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/2862 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/784 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/955 [00:00<?, ? examples/s]

stance_climate/train-00000-of-00001.parq(…):   0%|          | 0.00/28.1k [00:00<?, ?B/s]

stance_climate/test-00000-of-00001.parqu(…):   0%|          | 0.00/14.9k [00:00<?, ?B/s]

stance_climate/validation-00000-of-00001(…):   0%|          | 0.00/5.47k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/355 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/169 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/40 [00:00<?, ? examples/s]

In [3]:
# Step 2: Preprocess and Tokenize the Data
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")

def tokenize_data(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=128)

# Tokenize datasets
ds_irony = ds_irony.map(tokenize_data, batched=True)
ds_stance = ds_stance.map(tokenize_data, batched=True)

# Step 3: Align Datasets for Multi-Output Training
def align_datasets(ds1, ds2):
    size = min(len(ds1["train"]["label"]), len(ds2["train"]["label"]))
    ds1_inputs = {
        "input_ids": np.array(ds1["train"]["input_ids"][:size]),
        "attention_mask": np.array(ds1["train"]["attention_mask"][:size])
    }
    ds1_labels = tf.keras.utils.to_categorical(ds1["train"]["label"][:size], num_classes=2)
    ds2_labels = tf.keras.utils.to_categorical(ds2["train"]["label"][:size], num_classes=3)
    return ds1_inputs, ds1_labels, ds2_labels

inputs, labels_irony, labels_stance = align_datasets(ds_irony, ds_stance)

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

Map:   0%|          | 0/2862 [00:00<?, ? examples/s]

Map:   0%|          | 0/784 [00:00<?, ? examples/s]

Map:   0%|          | 0/955 [00:00<?, ? examples/s]

Map:   0%|          | 0/355 [00:00<?, ? examples/s]

Map:   0%|          | 0/169 [00:00<?, ? examples/s]

Map:   0%|          | 0/40 [00:00<?, ? examples/s]

In [7]:
# Step 4: Custom DistilBERT Layer (Frozen)

# from_pt=True added by Gemini to fix the "TypeError: 'builtins.safe_open' object is not iterable" error
# in the "distilbert_layer = DistilBERTLayer()" line
# It said "It looks like there's a compatibility issue when loading the DistilBERT model with the current
# versions of the libraries. I'll try setting from_pt=True in from_pretrained to load the PyTorch weights,
# which might fix the error.
class DistilBERTLayer(Layer):
    def __init__(self, model_name="distilbert-base-uncased", **kwargs):
        super(DistilBERTLayer, self).__init__(**kwargs)
        self.distilbert = TFDistilBertModel.from_pretrained(model_name, trainable=False, from_pt=True)

    def call(self, inputs):
        input_ids = inputs["input_ids"]
        attention_mask = inputs["attention_mask"]
        outputs = self.distilbert(input_ids=input_ids, attention_mask=attention_mask)
        return outputs.last_hidden_state

# Step 5: Custom Attention Block with Residual Connections
def attention_feedforward_block(x, num_heads, feedforward_dim, dropout_rate):
    # Multi-Head Attention
    attention_output = MultiHeadAttention(num_heads=num_heads, key_dim=x.shape[-1])(x, x)
    attention_output = Dropout(dropout_rate)(attention_output)
    attention_output = LayerNormalization(epsilon=1e-6)(x + attention_output)  # Residual connection + normalization

    # Feedforward Network
    ffn_output = Dense(feedforward_dim, activation="relu")(attention_output)
    ffn_output = Dense(x.shape[-1])(ffn_output)  # Match dimension back
    ffn_output = Dropout(dropout_rate)(ffn_output)
    output = LayerNormalization(epsilon=1e-6)(attention_output + ffn_output)  # Residual connection + normalization
    return output

# Added by Gemini to fix the "ValueError: A KerasTensor cannot be used as input to a TensorFlow function.
# A KerasTensor is a symbolic placeholder for a shape and dtype, used when constructing Keras Functional
# models or Keras Functions. You can only use it as input to a Keras layer or a Keras operation
# (from the namespaces `keras.layers` and `keras.ops`)" error
# Gemini said: It seems you are trying to use a TensorFlow function on a KerasTensor.
# I will wrap the tf.reduce_mean operation in a custom Keras Layer to resolve this error (the solution was
# simliar to what the error message include as an example of the fix)
# Custom Mean Pooling Layer
class MeanPoolingLayer(Layer):
    def call(self, inputs):
        return tf.reduce_mean(inputs, axis=1)

# Step 6: Build the Multi-Output Model
input_ids = Input(shape=(128,), dtype=tf.int32, name="input_ids")
attention_mask = Input(shape=(128,), dtype=tf.int32, name="attention_mask")
inputs_model = {"input_ids": input_ids, "attention_mask": attention_mask}

# DistilBERT base (frozen)
distilbert_layer = DistilBERTLayer()
bert_output = distilbert_layer(inputs_model)

# [XX]Another a follow up fix from Gemini because the error was still there (see MeanPoolingLayer class comments)
# It said "I apologize that the previous fix didn't resolve the issue. I see that I added the MeanPoolingLayer
# class but didn't replace the direct tf.reduce_mean calls with instances of this layer."
# Also see [XX] below
# Mean pooling
pooled_output = MeanPoolingLayer()(bert_output)

# Add custom attention and feedforward layers with residuals
x = attention_feedforward_block(bert_output, num_heads=4, feedforward_dim=1024, dropout_rate=0.3)
x = attention_feedforward_block(x, num_heads=4, feedforward_dim=1024, dropout_rate=0.3)

# [XX] Part of the Gemini fix see [XX] above
# Mean pooling after additional layers
x = MeanPoolingLayer()(x)

# Dropout for regularization
x = Dropout(0.3)(x)

# Task-specific output heads
irony_output = Dense(2, activation="softmax", name="irony_output")(x)  # Binary classification
stance_output = Dense(3, activation="softmax", name="stance_output")(x)  # Multi-class classification

# Define the model
multi_output_model = Model(inputs=[input_ids, attention_mask], outputs=[irony_output, stance_output])
multi_output_model.summary()

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFDistilBertModel: ['vocab_layer_norm.bias', 'vocab_transform.bias', 'vocab_transform.weight', 'vocab_projector.weight', 'vocab_layer_norm.weight', 'vocab_projector.bias']
- This IS expected if you are initializing TFDistilBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFDistilBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertModel for predictions without further training.


In [8]:
# Step 7: Compile the Model
multi_output_model.compile(
    optimizer=Adam(learning_rate=2e-5),
    loss={
        "irony_output": "categorical_crossentropy",
        "stance_output": "categorical_crossentropy"
    },
    metrics={
        "irony_output": "accuracy",
        "stance_output": "accuracy"
    }
)

# Step 8: Train the Model
history = multi_output_model.fit(
    x={"input_ids": inputs["input_ids"], "attention_mask": inputs["attention_mask"]},
    y={"irony_output": labels_irony, "stance_output": labels_stance},
    batch_size=64,
    epochs=20,
    validation_split=0.1
)

Epoch 1/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 6s/step - irony_output_accuracy: 0.5565 - irony_output_loss: 0.8169 - loss: 2.0020 - stance_output_accuracy: 0.4676 - stance_output_loss: 1.1850 - val_irony_output_accuracy: 0.5556 - val_irony_output_loss: 0.8496 - val_loss: 1.7722 - val_stance_output_accuracy: 0.4167 - val_stance_output_loss: 0.9226
Epoch 2/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 635ms/step - irony_output_accuracy: 0.6161 - irony_output_loss: 0.7409 - loss: 1.6815 - stance_output_accuracy: 0.5219 - stance_output_loss: 0.9409 - val_irony_output_accuracy: 0.5278 - val_irony_output_loss: 0.8799 - val_loss: 1.7717 - val_stance_output_accuracy: 0.5833 - val_stance_output_loss: 0.8918
Epoch 3/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 651ms/step - irony_output_accuracy: 0.6190 - irony_output_loss: 0.6574 - loss: 1.7026 - stance_output_accuracy: 0.4860 - stance_output_loss: 1.0449 - val_irony_output_accura

In [9]:
# Step 9: Evaluate the Model
results = multi_output_model.evaluate(
    x={"input_ids": inputs["input_ids"], "attention_mask": inputs["attention_mask"]},
    y={"irony_output": labels_irony, "stance_output": labels_stance}
)
print("Evaluation Results:", results)

[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 435ms/step - irony_output_accuracy: 0.8831 - irony_output_loss: 0.3045 - loss: 0.7093 - stance_output_accuracy: 0.8396 - stance_output_loss: 0.4325
Evaluation Results: [0.8508831858634949, 0.5100942254066467, 0.5214762091636658, 0.8507042527198792, 0.794366180896759]


In [10]:
# Step 10: Make Predictions
sample_texts = ["This weather is fantastic... not!", "Climate change is real, and we must act now."]
tokenized_inputs = tokenizer(sample_texts, padding="max_length", truncation=True, max_length=128, return_tensors="tf")

predictions = multi_output_model.predict({
    "input_ids": tokenized_inputs["input_ids"],
    "attention_mask": tokenized_inputs["attention_mask"]
})

for i, text in enumerate(sample_texts):
    irony_pred = predictions[0][i].argmax()
    stance_pred = predictions[1][i].argmax()
    print(f"Text: {text}")
    print(f"  Predicted Irony: {'Ironic' if irony_pred == 1 else 'Not Ironic'}")
    print(f"  Predicted Stance: {['Against', 'Neutral', 'Favor'][stance_pred]}")

TensorFlow and JAX classes are deprecated and will be removed in Transformers v5. We recommend migrating to PyTorch classes or pinning your version of Transformers.


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step
Text: This weather is fantastic... not!
  Predicted Irony: Ironic
  Predicted Stance: Favor
Text: Climate change is real, and we must act now.
  Predicted Irony: Not Ironic
  Predicted Stance: Favor
