In [None]:
!pip install transformers datasets  'tensorflow==2.15'

In [None]:
# Import necessary libraries
from datasets import load_dataset
from transformers import DistilBertTokenizer, TFDistilBertModel
import tensorflow as tf
from tensorflow.keras.layers import Dense, Input, Dropout, Layer, LayerNormalization, Add, MultiHeadAttention
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
import numpy as np

# Step 1: Load the TweetEval datasets
ds_irony = load_dataset("cardiffnlp/tweet_eval", "irony")
ds_stance = load_dataset("cardiffnlp/tweet_eval", "stance_climate")


In [None]:

# Step 2: Preprocess and Tokenize the Data
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")

def tokenize_data(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=128)

# Tokenize datasets
ds_irony = ds_irony.map(tokenize_data, batched=True)
ds_stance = ds_stance.map(tokenize_data, batched=True)

# Step 3: Align Datasets for Multi-Output Training
def align_datasets(ds1, ds2):
    size = min(len(ds1["train"]["label"]), len(ds2["train"]["label"]))
    ds1_inputs = {
        "input_ids": np.array(ds1["train"]["input_ids"][:size]),
        "attention_mask": np.array(ds1["train"]["attention_mask"][:size])
    }
    ds1_labels = tf.keras.utils.to_categorical(ds1["train"]["label"][:size], num_classes=2)
    ds2_labels = tf.keras.utils.to_categorical(ds2["train"]["label"][:size], num_classes=3)
    return ds1_inputs, ds1_labels, ds2_labels

inputs, labels_irony, labels_stance = align_datasets(ds_irony, ds_stance)


In [None]:

# Step 4: Custom DistilBERT Layer (Frozen)
class DistilBERTLayer(Layer):
    def __init__(self, model_name="distilbert-base-uncased", **kwargs):
        super(DistilBERTLayer, self).__init__(**kwargs)
        self.distilbert = TFDistilBertModel.from_pretrained(model_name, trainable=False)

    def call(self, inputs):
        input_ids = inputs["input_ids"]
        attention_mask = inputs["attention_mask"]
        outputs = self.distilbert(input_ids=input_ids, attention_mask=attention_mask)
        return outputs.last_hidden_state

# Step 5: Custom Attention Block with Residual Connections
def attention_feedforward_block(x, num_heads, feedforward_dim, dropout_rate):
    # Multi-Head Attention
    attention_output = MultiHeadAttention(num_heads=num_heads, key_dim=x.shape[-1])(x, x)
    attention_output = Dropout(dropout_rate)(attention_output)
    attention_output = LayerNormalization(epsilon=1e-6)(x + attention_output)  # Residual connection + normalization

    # Feedforward Network
    ffn_output = Dense(feedforward_dim, activation="relu")(attention_output)
    ffn_output = Dense(x.shape[-1])(ffn_output)  # Match dimension back
    ffn_output = Dropout(dropout_rate)(ffn_output)
    output = LayerNormalization(epsilon=1e-6)(attention_output + ffn_output)  # Residual connection + normalization
    return output

# Step 6: Build the Multi-Output Model
input_ids = Input(shape=(128,), dtype=tf.int32, name="input_ids")
attention_mask = Input(shape=(128,), dtype=tf.int32, name="attention_mask")
inputs_model = {"input_ids": input_ids, "attention_mask": attention_mask}

# DistilBERT base (frozen)
distilbert_layer = DistilBERTLayer()
bert_output = distilbert_layer(inputs_model)

# Mean pooling
pooled_output = tf.reduce_mean(bert_output, axis=1)

# Add custom attention and feedforward layers with residuals
x = attention_feedforward_block(bert_output, num_heads=4, feedforward_dim=1024, dropout_rate=0.3)
x = attention_feedforward_block(x, num_heads=4, feedforward_dim=1024, dropout_rate=0.3)

# Mean pooling after additional layers
x = tf.reduce_mean(x, axis=1)

# Dropout for regularization
x = Dropout(0.3)(x)

# Task-specific output heads
irony_output = Dense(2, activation="softmax", name="irony_output")(x)  # Binary classification
stance_output = Dense(3, activation="softmax", name="stance_output")(x)  # Multi-class classification

# Define the model
multi_output_model = Model(inputs=[input_ids, attention_mask], outputs=[irony_output, stance_output])
multi_output_model.summary()


In [None]:

# Step 7: Compile the Model
multi_output_model.compile(
    optimizer=Adam(learning_rate=2e-5),
    loss={
        "irony_output": "categorical_crossentropy",
        "stance_output": "categorical_crossentropy"
    },
    metrics={
        "irony_output": "accuracy",
        "stance_output": "accuracy"
    }
)

# Step 8: Train the Model
history = multi_output_model.fit(
    x={"input_ids": inputs["input_ids"], "attention_mask": inputs["attention_mask"]},
    y={"irony_output": labels_irony, "stance_output": labels_stance},
    batch_size=64,
    epochs=20,
    validation_split=0.1
)


In [None]:

# Step 9: Evaluate the Model
results = multi_output_model.evaluate(
    x={"input_ids": inputs["input_ids"], "attention_mask": inputs["attention_mask"]},
    y={"irony_output": labels_irony, "stance_output": labels_stance}
)
print("Evaluation Results:", results)


In [None]:

# Step 10: Make Predictions
sample_texts = ["This weather is fantastic... not!", "Climate change is real, and we must act now."]
tokenized_inputs = tokenizer(sample_texts, padding="max_length", truncation=True, max_length=128, return_tensors="tf")

predictions = multi_output_model.predict({
    "input_ids": tokenized_inputs["input_ids"],
    "attention_mask": tokenized_inputs["attention_mask"]
})

for i, text in enumerate(sample_texts):
    irony_pred = predictions[0][i].argmax()
    stance_pred = predictions[1][i].argmax()
    print(f"Text: {text}")
    print(f"  Predicted Irony: {'Ironic' if irony_pred == 1 else 'Not Ironic'}")
    print(f"  Predicted Stance: {['Against', 'Neutral', 'Favor'][stance_pred]}")
