In [1]:
import pandas as pd

In [2]:
splits = {'train': 'data/train-00000-of-00001.parquet', 'test': 'data/test-00000-of-00001.parquet'}


In [3]:
def get_cleaned_df(split: str) -> pd.DataFrame:
  df = pd.read_parquet("hf://datasets/ailsntua/QEvasion/" + splits["train"])
  df.drop(columns=['title', 'date', 'url'], axis=1, inplace=True)
  df.drop(columns=['annotator1', 'annotator2', 'annotator3', 'annotator_id', 'gpt3.5_summary', 'gpt3.5_prediction'], axis=1, inplace=True)
  conversion_dict = {
    'president': 'category',
    'clarity_label': 'category',
    'evasion_label': 'category',
    'question_order': 'int8',
    'index': 'int16',
    'interview_question': 'string',
    'interview_answer': 'string',
    'question': 'string',
  }

  df = df.astype(conversion_dict)
  return df

In [4]:
train_df = get_cleaned_df('train')
test_df = get_cleaned_df('test')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [5]:
train_df.sample(3)

Unnamed: 0,president,question_order,interview_question,interview_answer,question,inaudible,multiple_questions,affirmative_questions,index,clarity_label,evasion_label
1675,Donald J. Trump,1,Q. [Inaudible]—Sankei Newspaper Company. I hav...,"Well, I think it's very sad. I look at what's ...","To North Korea, is there a possibility of U.S....",False,False,False,1675,Ambivalent,Dodging
1215,Donald J. Trump,18,Q. Has Netanyahu made concessions?,"Yes, go ahead. Please. Sir./Japan-U.S. Trade",Whether Netanyahu has made concessions.,False,False,True,1215,Ambivalent,Dodging
521,Donald J. Trump,7,"Q. Eighty days before an election, sir? Is thi...","Well, wait a minute. You just threw—look, I ju...",Is this the right time?,False,False,False,521,Ambivalent,Implicit


In [6]:
train_df['text'] = train_df['interview_question'] + ' [SEP] ' + train_df['interview_answer']
test_df['text'] = test_df['interview_question'] + ' [SEP] ' + test_df['interview_answer']


train_df['text'].sample(2).tolist()

["Q. Okay. And do you bear responsibility for everything that's happening at the border now? I hear you talking a lot about the past administration. You decided to roll back some of those policies, did you move too quickly to roll back—[inaudible]—policies? [SEP] To roll back what? I'm sorry.",
 "Q. You're about to embark on your final foreign trip. What will you say to other world leaders about your successor? They've expressed many of the same misgivings that you have about Donald Trump. Should they be worried about the future of U.S. foreign policy? And separately, as Democrats scramble to regroup after a pretty shocking upset, what is your advice about where the party goes now? And who should lead your party? [SEP] Good. One of the great things about the United States is that when it comes to world affairs, the President obviously is the leader of the executive branch, the Commander in Chief, the spokesperson for the Nation, but the influence and the work that we have is the result

In [7]:
from sklearn.preprocessing import LabelEncoder

# Initialize LabelEncoders for clarity_label and evasion_label
clarity_encoder = LabelEncoder()
evasion_encoder = LabelEncoder()

# Fit and transform clarity_label for both dataframes
train_df['clarity_label_encoded'] = clarity_encoder.fit_transform(train_df['clarity_label'])
test_df['clarity_label_encoded'] = clarity_encoder.transform(test_df['clarity_label'])

# Fit and transform evasion_label for both dataframes
train_df['evasion_label_encoded'] = evasion_encoder.fit_transform(train_df['evasion_label'])
test_df['evasion_label_encoded'] = evasion_encoder.transform(test_df['evasion_label'])


train_df[['clarity_label', 'clarity_label_encoded', 'evasion_label', 'evasion_label_encoded']].sample(3)

Unnamed: 0,clarity_label,clarity_label_encoded,evasion_label,evasion_label_encoded
3261,Ambivalent,0,Implicit,7
1618,Ambivalent,0,General,6
145,Ambivalent,0,General,6


In [8]:
from transformers import AutoTokenizer

# Initialize a tokenizer
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')

print(f"Tokenizer initialized: {tokenizer.name_or_path}")

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Tokenizer initialized: bert-base-uncased


In [9]:
train_tokenized = tokenizer(train_df['text'].tolist(), truncation=True, padding=True)
test_tokenized = tokenizer(test_df['text'].tolist(), truncation=True, padding=True)


In [10]:
!pip install datasets



In [11]:
import torch
from datasets import Dataset

In [12]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, clarity_labels, evasion_labels):
        self.encodings = encodings
        self.clarity_labels = clarity_labels
        self.evasion_labels = evasion_labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['clarity_labels'] = torch.tensor(self.clarity_labels[idx], dtype=torch.long)
        item['evasion_labels'] = torch.tensor(self.evasion_labels[idx], dtype=torch.long)
        return item

    def __len__(self):
        return len(self.clarity_labels)

train_dataset = CustomDataset(
    train_tokenized,
    train_df['clarity_label_encoded'].tolist(),
    train_df['evasion_label_encoded'].tolist()
)

test_dataset = CustomDataset(
    test_tokenized,
    test_df['clarity_label_encoded'].tolist(),
    test_df['evasion_label_encoded'].tolist()
)

print(f"Train dataset size: {len(train_dataset)}")
print(f"Test dataset size: {len(test_dataset)}")

Train dataset size: 3448
Test dataset size: 3448


In [13]:
from transformers import AutoModel

# Load a pre-trained model
model_name = 'bert-base-uncased'
model = AutoModel.from_pretrained(model_name)

# Determine the number of unique labels for each task
num_clarity_labels = train_df['clarity_label_encoded'].nunique()
num_evasion_labels = train_df['evasion_label_encoded'].nunique()

print(f"Pre-trained model '{model_name}' loaded successfully.")
print(f"Number of unique clarity labels: {num_clarity_labels}")
print(f"Number of unique evasion labels: {num_evasion_labels}")

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Pre-trained model 'bert-base-uncased' loaded successfully.
Number of unique clarity labels: 3
Number of unique evasion labels: 9


In [14]:
import torch.nn as nn

class MultiTaskModel(nn.Module):
    def __init__(self, base_model, num_clarity_labels, num_evasion_labels):
        super().__init__()
        self.base_model = base_model
        hidden_size = self.base_model.config.hidden_size # Get hidden size from base model config

        self.clarity_classifier = nn.Linear(hidden_size, num_clarity_labels)
        self.evasion_classifier = nn.Linear(hidden_size, num_evasion_labels)

    def forward(self, input_ids, attention_mask):
        outputs = self.base_model(input_ids=input_ids, attention_mask=attention_mask)
        # Extract the hidden state of the first token ([CLS] token)
        cls_embedding = outputs.last_hidden_state[:, 0]

        clarity_logits = self.clarity_classifier(cls_embedding)
        evasion_logits = self.evasion_classifier(cls_embedding)

        return clarity_logits, evasion_logits


In [15]:
model = MultiTaskModel(model, num_clarity_labels, num_evasion_labels)

# Define the optimizer
optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)

# Define loss functions for each task
clarity_loss_fn = nn.CrossEntropyLoss()
evasion_loss_fn = nn.CrossEntropyLoss()

# Create DataLoader objects
from torch.utils.data import DataLoader

batch_size = 16
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

print(f"Train DataLoader has {len(train_dataloader)} batches.")
print(f"Test DataLoader has {len(test_dataloader)} batches.")

Train DataLoader has 216 batches.
Test DataLoader has 216 batches.


In [16]:
# device = torch.device('cpu') # Temporarily force to CPU for debugging
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') # Original line
model.to(device)

epochs = 3 # Define the number of epochs

print(f"Starting training on device: {device}")

for epoch in range(epochs):
    model.train() # Set model to training mode
    total_train_loss = 0
    for batch_idx, batch in enumerate(train_dataloader):
        # Move tensors to the appropriate device
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        clarity_labels = batch['clarity_labels'].to(device)
        evasion_labels = batch['evasion_labels'].to(device)

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        clarity_logits, evasion_logits = model(input_ids=input_ids, attention_mask=attention_mask)

        # Calculate loss for each task
        clarity_loss = clarity_loss_fn(clarity_logits, clarity_labels)
        evasion_loss = evasion_loss_fn(evasion_logits, evasion_labels)

        # Combine losses
        total_loss = clarity_loss + evasion_loss

        # Backward pass and optimize
        total_loss.backward()
        optimizer.step()

        total_train_loss += total_loss.item()

        if (batch_idx + 1) % 50 == 0: # Print training loss periodically
            print(f"Epoch {epoch+1}, Batch {batch_idx+1}/{len(train_dataloader)}, Loss: {total_loss.item():.4f}")

    avg_train_loss = total_train_loss / len(train_dataloader)
    print(f"Epoch {epoch+1} finished. Average training loss: {avg_train_loss:.4f}")

print("Training complete.")

Starting training on device: cuda
Epoch 1, Batch 50/216, Loss: 2.7811
Epoch 1, Batch 100/216, Loss: 2.9734
Epoch 1, Batch 150/216, Loss: 2.5728
Epoch 1, Batch 200/216, Loss: 3.0474
Epoch 1 finished. Average training loss: 2.6927
Epoch 2, Batch 50/216, Loss: 2.0259
Epoch 2, Batch 100/216, Loss: 2.9835
Epoch 2, Batch 150/216, Loss: 2.1049
Epoch 2, Batch 200/216, Loss: 2.4455
Epoch 2 finished. Average training loss: 2.4212
Epoch 3, Batch 50/216, Loss: 2.1670
Epoch 3, Batch 100/216, Loss: 2.0448
Epoch 3, Batch 150/216, Loss: 1.5135
Epoch 3, Batch 200/216, Loss: 1.9667
Epoch 3 finished. Average training loss: 2.1110
Training complete.


In [17]:
import random

# Select a random index from the training dataset
random_idx = random.randint(0, len(train_df) - 1)

# Get the sample from the train_df
sample_row = train_df.iloc[random_idx]

# Extract the original text and labels
input_text = sample_row['text']
true_clarity_label_encoded = sample_row['clarity_label_encoded']
true_evasion_label_encoded = sample_row['evasion_label_encoded']

true_clarity_label = clarity_encoder.inverse_transform([true_clarity_label_encoded])[0]
true_evasion_label = evasion_encoder.inverse_transform([true_evasion_label_encoded])[0]

# Tokenize the input text
inputs = tokenizer(input_text, return_tensors='pt', truncation=True, padding=True)

# Move inputs to the device
input_ids = inputs['input_ids'].to(device)
attention_mask = inputs['attention_mask'].to(device)

# Set model to evaluation mode
model.eval()

# Get predictions
with torch.no_grad():
    clarity_logits, evasion_logits = model(input_ids=input_ids, attention_mask=attention_mask)

# Get predicted labels (indices)
predicted_clarity_label_encoded = torch.argmax(clarity_logits, dim=-1).item()
predicted_evasion_label_encoded = torch.argmax(evasion_logits, dim=-1).item()

# Decode predicted labels
predicted_clarity_label = clarity_encoder.inverse_transform([predicted_clarity_label_encoded])[0]
predicted_evasion_label = evasion_encoder.inverse_transform([predicted_evasion_label_encoded])[0]

print(f"Input Text: {input_text}")
print("\n--- True Labels ---")
print(f"Clarity Label: {true_clarity_label}")
print(f"Evasion Label: {true_evasion_label}")

print("\n--- Predicted Labels ---")
print(f"Predicted Clarity Label: {predicted_clarity_label}")
print(f"Predicted Evasion Label: {predicted_evasion_label}")


Input Text: Q. Thank you, Mr. President. I just want to come back to the debt ceiling, because in the summer of 2011, you said that you wouldn't negotiate on the debt ceiling, and you did. Last year, you said that you wouldn't extend any of the Bush tax cuts for the wealthy, and you did. So as you say now that you're not going to negotiate on the debt ceiling this year, why should House Republicans take that seriously and think that if we get to the 1-minute-to-midnight scenario, that you're not going to back down? [SEP] Well, first of all, Julianna, let's take the example of this year and the fiscal cliff. I didn't say that I would not have any conversations at all about extending the Bush tax cuts. What I said was we weren't going to extend Bush tax cuts for the wealthy, and we didn't. Now, you can argue that during the campaign, I saidI set the criteria for wealthy at 250, and we ended up being at 400. But the fact of the matter is millionaires, billionaires, are paying significantl

# Evaluation


In [18]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

In [19]:
model.eval() # Set model to evaluation mode

# Initialize lists to store true labels and predictions
all_clarity_labels = []
all_clarity_preds = []
all_evasion_labels = []
all_evasion_preds = []

print("Starting evaluation on the test dataset...")

with torch.no_grad(): # Disable gradient calculation for evaluation
    for batch_idx, batch in enumerate(test_dataloader):
        # Move tensors to the appropriate device
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        clarity_labels = batch['clarity_labels'].to(device)
        evasion_labels = batch['evasion_labels'].to(device)

        # Forward pass
        clarity_logits, evasion_logits = model(input_ids=input_ids, attention_mask=attention_mask)

        # Get predicted labels
        clarity_preds = torch.argmax(clarity_logits, dim=-1)
        evasion_preds = torch.argmax(evasion_logits, dim=-1)

        # Append true labels and predictions
        all_clarity_labels.extend(clarity_labels.cpu().tolist())
        all_clarity_preds.extend(clarity_preds.cpu().tolist())
        all_evasion_labels.extend(evasion_labels.cpu().tolist())
        all_evasion_preds.extend(evasion_preds.cpu().tolist())

# Convert lists to numpy arrays for metric calculation
import numpy as np
all_clarity_labels = np.array(all_clarity_labels)
all_clarity_preds = np.array(all_clarity_preds)
all_evasion_labels = np.array(all_evasion_labels)
all_evasion_preds = np.array(all_evasion_preds)

# Calculate and print metrics for clarity_label
print("\n--- Clarity Label Evaluation ---")
clarity_accuracy = accuracy_score(all_clarity_labels, all_clarity_preds)
clarity_precision, clarity_recall, clarity_f1, _ = precision_recall_fscore_support(
    all_clarity_labels, all_clarity_preds, average='weighted', zero_division=0
)
print(f"Accuracy: {clarity_accuracy:.4f}")
print(f"Precision (weighted): {clarity_precision:.4f}")
print(f"Recall (weighted): {clarity_recall:.4f}")
print(f"F1-Score (weighted): {clarity_f1:.4f}")

# Calculate and print metrics for evasion_label
print("\n--- Evasion Label Evaluation ---")
evasion_accuracy = accuracy_score(all_evasion_labels, all_evasion_preds)
evasion_precision, evasion_recall, evasion_f1, _ = precision_recall_fscore_support(
    all_evasion_labels, all_evasion_preds, average='weighted', zero_division=0
)
print(f"Accuracy: {evasion_accuracy:.4f}")
print(f"Precision (weighted): {evasion_precision:.4f}")
print(f"Recall (weighted): {evasion_recall:.4f}")
print(f"F1-Score (weighted): {evasion_f1:.4f}")

print("\nEvaluation complete for both tasks.")

Starting evaluation on the test dataset...

--- Clarity Label Evaluation ---
Accuracy: 0.7926
Precision (weighted): 0.7939
Recall (weighted): 0.7926
F1-Score (weighted): 0.7930

--- Evasion Label Evaluation ---
Accuracy: 0.5267
Precision (weighted): 0.5947
Recall (weighted): 0.5267
F1-Score (weighted): 0.4319

Evaluation complete for both tasks.
