### Counter Speech Generation with GPT-2, EAR, and KLAR Regularization with LOTO Evaluation

In [1]:
import pandas as pd
# Load additional hate speech and counter-narrative dataset
cn_data = pd.read_csv('MT-CONAN.csv')

In [2]:
cn_data.head()

Unnamed: 0,INDEX,HATE_SPEECH,COUNTER_NARRATIVE,TARGET,VERSION
0,0,Maybe the UN could talk to those asian and afr...,"You're right, ocean pollution is one of the ma...",other,V1
1,1,‘Juice (Jews) are worse that nukes.’ Caption t...,You'd rather be nuked then?,JEWS,V1
2,2,Foreigners on UK benefits leaps 41% in 5 years...,You seem to be worried about government expend...,MIGRANTS,V1
3,3,"Jews cannot be patriots, since their allegianc...",You can have parents and grandparents born els...,JEWS,V1
4,4,"Israel, Zionism and Jewish Supremacist control...",Wouldn't you say its colonialism from many dif...,JEWS,V1


In [3]:
cn_data.describe()

Unnamed: 0,INDEX
count,5003.0
mean,2501.0
std,1444.386029
min,0.0
25%,1250.5
50%,2501.0
75%,3751.5
max,5002.0


In [4]:
import pandas as pd

# Load the data
cn_data = pd.read_csv('MT-CONAN.csv')

# Get unique groups
unique_groups = cn_data['TARGET'].unique()

# Display unique groups
print(f'The unique groups in the TARGET column are:\n{unique_groups}')


The unique groups in the TARGET column are:
['other' 'JEWS' 'MIGRANTS' 'WOMEN' 'POC' 'LGBT+' 'MUSLIMS' 'DISABLED']


### Model Training Phase

In [5]:
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import GPT2LMHeadModel, GPT2Tokenizer, Trainer, TrainingArguments, GPT2Config
from sklearn.model_selection import LeaveOneGroupOut
import torch.nn.functional as F
import torch.nn as nn  

# Load the data
cn_data = pd.read_csv('MT-CONAN.csv')

# Preprocess the data
cn_data.fillna("", inplace=True)
texts = cn_data['HATE_SPEECH'].tolist()
targets = cn_data['TARGET'].tolist()
counter_texts = cn_data['COUNTER_NARRATIVE'].tolist()

# Tokenization
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.pad_token = tokenizer.eos_token

# Add special tokens
special_tokens = {'additional_special_tokens': ['<hatespeech>', '<counternarrative>']}
tokenizer.add_special_tokens(special_tokens)

class CounterSpeechDataset(Dataset):
    def __init__(self, texts, counter_texts):
        self.texts = texts
        self.counter_texts = counter_texts

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        input_text = f"<hatespeech> {self.texts[idx]} <counternarrative>"
        target_text = self.counter_texts[idx]

        encodings = tokenizer(input_text, truncation=True, padding='max_length', max_length=128, return_tensors="pt")
        counter_encodings = tokenizer(target_text, truncation=True, padding='max_length', max_length=128, return_tensors="pt")
        
        item = {key: val.squeeze() for key, val in encodings.items()}
        item['labels'] = counter_encodings['input_ids'].squeeze()
        return item

# Define the model with regularization and dropout
class GPT2WithRegularization(GPT2LMHeadModel):
    def __init__(self, config, dropout_rate=0.5):
        super().__init__(config)
        self.ear_weight = 0.1
        self.klar_weight = 0.1
        self.dropout = nn.Dropout(dropout_rate)  # Adding dropout layer

    def forward(self, input_ids=None, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None,
                inputs_embeds=None, labels=None, output_attentions=None, output_hidden_states=None, return_dict=None):
        outputs = self.transformer(input_ids,
                                   attention_mask=attention_mask,
                                   token_type_ids=token_type_ids,
                                   position_ids=position_ids,
                                   head_mask=head_mask,
                                   inputs_embeds=inputs_embeds,
                                   output_attentions=output_attentions,
                                   output_hidden_states=output_hidden_states,
                                   return_dict=return_dict)
        
        hidden_states = outputs[0]
        hidden_states = self.dropout(hidden_states)  # Apply dropout to hidden states
        lm_logits = self.lm_head(hidden_states)

        loss = None
        if labels is not None:
            shift_logits = lm_logits[..., :-1, :].contiguous()
            shift_labels = labels[..., 1:].contiguous()
            loss_fct = torch.nn.CrossEntropyLoss()
            loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))

            # EAR
            entropy = -torch.sum(F.log_softmax(lm_logits, dim=-1) * F.softmax(lm_logits, dim=-1), dim=-1).mean()
            loss += self.ear_weight * entropy

            # KLAR
            attentions = outputs.attentions
            if attentions is not None:
                for attention in attentions:
                    attention = attention.mean(dim=1)
                    target_attention = torch.eye(attention.size(-1)).to(attention.device)
                    klar = F.kl_div(attention, target_attention, reduction='batchmean')
                    loss += self.klar_weight * klar

        if not return_dict:
            output = (lm_logits,) + outputs[1:]
            return ((loss,) + output) if loss is not None else output

        return {"loss": loss, "logits": lm_logits, "hidden_states": outputs.hidden_states, "attentions": outputs.attentions}

# Enable attention outputs in the model config
config = GPT2Config.from_pretrained('gpt2')
config.output_attentions = True
model = GPT2WithRegularization(config)

# Update model config for additional special tokens
model.resize_token_embeddings(len(tokenizer))

# Training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=4,
    warmup_ratio=0.1,  # Using warmup ratio instead of warmup steps
    learning_rate=5e-5,  # Set learning rate to 5e-5
    weight_decay=0.01,
    logging_dir='./logs',
    evaluation_strategy="epoch",
    save_total_limit=1,
    fp16=False,  # Disable FP16 as we are not using CUDA/GPU
)

# LOTO Evaluation
logo = LeaveOneGroupOut()
for train_idx, val_idx in logo.split(texts, counter_texts, groups=targets):
    train_texts = [texts[i] for i in train_idx]
    val_texts = [texts[i] for i in val_idx]
    train_counter_texts = [counter_texts[i] for i in train_idx]
    val_counter_texts = [counter_texts[i] for i in val_idx]

    train_dataset = CounterSpeechDataset(train_texts, train_counter_texts)
    val_dataset = CounterSpeechDataset(val_texts, val_counter_texts)

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
    )

    trainer.train()




Epoch,Training Loss,Validation Loss
1,-14.1025,-19.272741
2,-18.4323,-19.792513
3,-18.7758,-19.9547




Epoch,Training Loss,Validation Loss
1,-19.2409,-18.852114
2,-19.4012,-18.972481
3,-19.7389,-19.00869




Epoch,Training Loss,Validation Loss
1,-19.5704,-19.938175
2,-19.4752,-19.96138
3,-19.8599,-19.99535




Epoch,Training Loss,Validation Loss
1,-20.1325,-18.019915
2,-20.2126,-18.027205
3,-20.1857,-18.108946




Epoch,Training Loss,Validation Loss
1,No log,-19.465967
2,-19.985300,-19.466116
3,-19.943400,-19.46104




Epoch,Training Loss,Validation Loss
1,-19.7486,-20.621311
2,-19.9406,-20.648624
3,-19.8353,-20.664507




Epoch,Training Loss,Validation Loss
1,-19.4926,-22.274693
2,-19.7118,-22.274582
3,-19.6428,-22.294003




Epoch,Training Loss,Validation Loss
1,-19.9159,-22.436157
2,-19.8433,-22.43272
3,-19.5547,-22.450878


### Inference Phase

In [4]:
import torch
from transformers import GPT2Tokenizer, GPT2Config, GPT2LMHeadModel
import torch.nn.functional as F
import torch.nn as nn

# Define the custom GPT2 model with regularization and dropout
class GPT2WithRegularization(GPT2LMHeadModel):
    def __init__(self, config, dropout_rate=0.5):
        super().__init__(config)
        self.dropout = nn.Dropout(dropout_rate)  # Adding dropout layer

    def forward(self, input_ids=None, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None,
                inputs_embeds=None, labels=None, output_attentions=None, output_hidden_states=None, return_dict=None):
        outputs = self.transformer(input_ids,
                                   attention_mask=attention_mask,
                                   token_type_ids=token_type_ids,
                                   position_ids=position_ids,
                                   head_mask=head_mask,
                                   inputs_embeds=inputs_embeds,
                                   output_attentions=output_attentions,
                                   output_hidden_states=output_hidden_states,
                                   return_dict=return_dict)
        
        hidden_states = outputs[0]
        hidden_states = self.dropout(hidden_states)  # Apply dropout to hidden states
        lm_logits = self.lm_head(hidden_states)

        loss = None
        if labels is not None:
            shift_logits = lm_logits[..., :-1, :].contiguous()
            shift_labels = labels[..., 1:].contiguous()
            loss_fct = torch.nn.CrossEntropyLoss()
            loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))

        if not return_dict:
            output = (lm_logits,) + outputs[1:]
            return ((loss,) + output) if loss is not None else output

        return {"loss": loss, "logits": lm_logits, "hidden_states": outputs.hidden_states, "attentions": outputs.attentions}

# Load the model configuration with attention outputs enabled
config = GPT2Config.from_pretrained('./results', output_attentions=True)
model = GPT2WithRegularization.from_pretrained('./results', config=config)

# Load the tokenizer
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.pad_token = tokenizer.eos_token

# Add the special tokens
special_tokens = {'additional_special_tokens': ['<hatespeech>', '<counternarrative>']}
tokenizer.add_special_tokens(special_tokens)
model.resize_token_embeddings(len(tokenizer))

# Inference functions remain the same
def generate_beam_search(prompt, model, tokenizer, num_beams=3, max_length=50):
    input_text = f"<hatespeech> {prompt} <counternarrative>"
    input_ids = tokenizer.encode(input_text, return_tensors='pt')
    beam_output = model.generate(
        input_ids,
        max_length=max_length,
        num_beams=num_beams,
        early_stopping=True
    )
    return tokenizer.decode(beam_output[0], skip_special_tokens=True)

def generate_top_k_sampling(prompt, model, tokenizer, top_k=50, max_length=50):
    input_text = f"<hatespeech> {prompt} <counternarrative>"
    input_ids = tokenizer.encode(input_text, return_tensors='pt')
    top_k_output = model.generate(
        input_ids,
        max_length=max_length,
        do_sample=True,
        top_k=top_k
    )
    return tokenizer.decode(top_k_output[0], skip_special_tokens=True)

def generate_top_p_sampling(prompt, model, tokenizer, top_p=0.9, max_length=50):
    input_text = f"<hatespeech> {prompt} <counternarrative>"
    input_ids = tokenizer.encode(input_text, return_tensors='pt')
    top_p_output = model.generate(
        input_ids,
        max_length=max_length,
        do_sample=True,
        top_p=top_p
    )
    return tokenizer.decode(top_p_output[0], skip_special_tokens=True)

def generate_top_k_top_p_sampling(prompt, model, tokenizer, top_k=50, top_p=0.9, max_length=50):
    input_text = f"<hatespeech> {prompt} <counternarrative>"
    input_ids = tokenizer.encode(input_text, return_tensors='pt')
    top_k_top_p_output = model.generate(
        input_ids,
        max_length=max_length,
        do_sample=True,
        top_k=top_k,
        top_p=top_p
    )
    return tokenizer.decode(top_k_top_p_output[0], skip_special_tokens=True)

# Example Usage
prompt = "I hate this!"
print("Beam Search Output:", generate_beam_search(prompt, model, tokenizer))
print("Top-k Sampling Output:", generate_top_k_sampling(prompt, model, tokenizer))
print("Top-p Sampling Output:", generate_top_p_sampling(prompt, model, tokenizer))
print("Top-k and Top-p Sampling Output:", generate_top_k_top_p_sampling(prompt, model, tokenizer))


OSError: ./results does not appear to have a file named config.json. Checkout 'https://huggingface.co/./results/None' for available files.