# Attention network

## Libraries

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from transformers import XLMRobertaModel, XLMRobertaTokenizer
from sklearn.metrics import classification_report
import pandas as pd

## Dataset Class

In [3]:
class FauxHateDataset(Dataset):
    def __init__(self, data, tokenizer, max_len):
        self.data = data
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        row = self.data.iloc[index]
        text = row['Tweet']
        label_faux = row['Fake']
        label_hate = row['Hate']

        encoding = self.tokenizer.encode_plus(
            text,
            max_length=self.max_len,
            truncation=True,
            padding='max_length',
            add_special_tokens=True,
            return_tensors='pt'
        )

        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels_faux': torch.tensor(label_faux, dtype=torch.long),
            'labels_hate': torch.tensor(label_hate, dtype=torch.long)
        }

## Model architecture

In [4]:
class AttentionLayer(nn.Module):
    def __init__(self, hidden_dim):
        super(AttentionLayer, self).__init__()
        self.attn = nn.Linear(hidden_dim, 1)

    def forward(self, features):
        attn_weights = torch.softmax(self.attn(features), dim=1)
        weighted_representation = torch.sum(features * attn_weights, dim=1)
        return weighted_representation


class MultiTaskAttentionModel(nn.Module):
    def __init__(self, num_classes_fake=2, num_classes_hate=2, hidden_dim=768):
        super(MultiTaskAttentionModel, self).__init__()
        self.xlm_roberta = XLMRobertaModel.from_pretrained('xlm-roberta-base')


        self.fake_attention = AttentionLayer(hidden_dim)
        self.hate_attention = AttentionLayer(hidden_dim)


        self.fake_classifier = nn.Linear(hidden_dim, num_classes_fake)
        self.hate_classifier = nn.Linear(hidden_dim, num_classes_hate)

    def forward(self, input_ids, attention_mask):
        outputs = self.xlm_roberta(input_ids=input_ids, attention_mask=attention_mask)
        all_hidden_states = outputs.last_hidden_state

        fake_representation = self.fake_attention(all_hidden_states)
        hate_representation = self.hate_attention(all_hidden_states)

        fake_logits = self.fake_classifier(fake_representation)
        hate_logits = self.hate_classifier(hate_representation)

        return fake_logits, hate_logits

In [5]:
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=10, patience=3):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    best_val_loss = float('inf')
    patience_counter = 0

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0

        for batch in train_loader:
            fake_labels = batch['labels_faux'].to(device)
            hate_labels = batch['labels_hate'].to(device)
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)

            optimizer.zero_grad()
            fake_logits, hate_logits = model(input_ids, attention_mask)

            fake_loss = criterion(fake_logits, fake_labels)
            hate_loss = criterion(hate_logits, hate_labels)
            loss = fake_loss + hate_loss

            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        train_loss /= len(train_loader)
        val_loss, val_fake_preds, val_hate_preds, val_fake_labels, val_hate_labels = evaluate_model(model, val_loader, criterion, device)

        print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")
        print("Validation Fake Classification Report:")
        print(classification_report(val_fake_labels, val_fake_preds))
        print("Validation Hate Classification Report:")
        print(classification_report(val_hate_labels, val_hate_preds))
        print("--------------------------------------------------")


        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            torch.save(model.state_dict(), 'best_attention_model_task_a.pth')  # Save the best model
            print("Model improved, saving current model.")
        else:
            patience_counter += 1
            print(f"No improvement. Early stopping counter: {patience_counter}/{patience}")

        if patience_counter >= patience:
            print("Early stopping triggered. Training terminated.")
            break

In [6]:
def evaluate_model(model, val_loader, criterion, device):
    model.eval()
    val_loss = 0.0
    val_fake_preds, val_hate_preds = [], []
    val_fake_labels, val_hate_labels = [], []

    with torch.no_grad():
        for batch in val_loader:
            fake_labels = batch['labels_faux'].to(device)
            hate_labels = batch['labels_hate'].to(device)
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)

            fake_logits, hate_logits = model(input_ids, attention_mask)
            fake_loss = criterion(fake_logits, fake_labels)
            hate_loss = criterion(hate_logits, hate_labels)
            loss = fake_loss + hate_loss
            val_loss += loss.item()

            val_fake_preds.extend(torch.argmax(fake_logits, dim=1).cpu().numpy())
            val_hate_preds.extend(torch.argmax(hate_logits, dim=1).cpu().numpy())
            val_fake_labels.extend(fake_labels.cpu().numpy())
            val_hate_labels.extend(hate_labels.cpu().numpy())

    val_loss /= len(val_loader)
    return val_loss, val_fake_preds, val_hate_preds, val_fake_labels, val_hate_labels

In [7]:
tokenizer = XLMRobertaTokenizer.from_pretrained('xlm-roberta-base')
train_data = pd.read_csv("/content/cleaned_train.csv").dropna(subset=['Tweet'])
val_data = pd.read_csv("/content/cleaned_val.csv").dropna(subset=['Tweet'])
MAX_LEN = 180

train_dataset = FauxHateDataset(train_data, tokenizer, MAX_LEN)
val_dataset = FauxHateDataset(val_data, tokenizer, MAX_LEN)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16)

model = MultiTaskAttentionModel()
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=1e-5)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.12G [00:00<?, ?B/s]

## Training the model

In [8]:
train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=30, patience=3)

Epoch 1/30, Train Loss: 1.1120, Val Loss: 1.0674
Validation Fake Classification Report:
              precision    recall  f1-score   support

           0       0.78      0.66      0.72       376
           1       0.73      0.84      0.78       423

    accuracy                           0.75       799
   macro avg       0.76      0.75      0.75       799
weighted avg       0.76      0.75      0.75       799

Validation Hate Classification Report:
              precision    recall  f1-score   support

           0       0.68      0.52      0.59       286
           1       0.77      0.87      0.81       513

    accuracy                           0.74       799
   macro avg       0.73      0.69      0.70       799
weighted avg       0.74      0.74      0.73       799

--------------------------------------------------
Model improved, saving current model.
Epoch 2/30, Train Loss: 1.0104, Val Loss: 1.0042
Validation Fake Classification Report:
              precision    recall  f1-scor

KeyboardInterrupt: 

In [22]:
import tqdm

class FauxHateDataset(Dataset):
    def __init__(self, data, tokenizer, max_len):
        self.data = data
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        row = self.data.iloc[index]
        text = row['Tweet']

        # Check if 'label_faux' and 'label_hate' columns exist
        # If they don't exist, assume they are 0 for the test data
        label_faux = row.get('label_faux', 0)
        label_hate = row.get('label_hate', 0)

        # Tokenize the text
        encoding = self.tokenizer(
            text,
            add_special_tokens=True,
            max_length=self.max_len,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )

        # Return the tokenized input and labels
        return {
            'input_ids1': encoding['input_ids'].flatten(),
            'attention_mask1': encoding['attention_mask'].flatten(),
            'input_ids2': encoding['input_ids'].flatten(),  # You might want to change this for task B
            'attention_mask2': encoding['attention_mask'].flatten(),  # You might want to change this for task B
            'label_faux': torch.tensor(label_faux, dtype=torch.long),
            'label_hate': torch.tensor(label_hate, dtype=torch.long)
        }

In [26]:
import tqdm

def predict(model, dataloader):
    predictions = []
    with torch.no_grad():
        # Iterate using tqdm.tqdm to display a progress bar
        for batch in tqdm.tqdm(dataloader):
            input_ids1 = batch['input_ids1'].to(device)
            attention_mask1 = batch['attention_mask1'].to(device)
            # input_ids2 = batch['input_ids2'].to(device)  # These lines are removed or modified
            # attention_mask2 = batch['attention_mask2'].to(device)  # These lines are removed or modified

            # Forward pass - pass only the required arguments to the model's forward method
            fake_logits, hate_logits = model(input_ids1, attention_mask1)  # Pass only required arguments

            # Get the predicted labels
            pred_fake = torch.argmax(fake_logits, dim=1).cpu().numpy()
            pred_hate = torch.argmax(hate_logits, dim=1).cpu().numpy()

            predictions.extend(zip(pred_fake, pred_hate))

    return predictions

In [27]:
def load_model(model_path, tokenizer):
    model = MultiTaskAttentionModel()
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.to(device)
    model.eval()  # Set to evaluation mode
    return model

In [28]:
import tqdm

model_path = '/content/best_attention_model_task_a.pth'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = load_model(model_path, tokenizer)

# Load the test data
test_data = pd.read_csv('/content/cleaned_test_a.csv')  # Assuming the test data CSV has 'id' and 'Tweet' columns

# Prepare the test dataset and DataLoader
test_dataset = FauxHateDataset(test_data, tokenizer, max_len=180)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Make predictions
predictions = predict(model, test_dataloader)

  model.load_state_dict(torch.load(model_path, map_location=device))
100%|██████████| 25/25 [00:08<00:00,  3.10it/s]


In [29]:
# Prepare the output DataFrame
output_df = pd.DataFrame(predictions, columns=['Fake', 'Hate'])
output_df['Id'] = test_data['Id']
output_df = output_df[['Id', 'Hate', 'Fake']]  # Reorder columns as required

# Save the predictions to a CSV file
output_df.to_csv('predictions_attention_task_a.csv', index=False)

print("Predictions saved to 'predictions.csv'")

Predictions saved to 'predictions.csv'
