# SemEval-2024

# Scorer from organizers

In [4]:
import pdb
import json
import logging.handlers
import argparse
import os
import numpy as np
from sklearn.metrics import f1_score
from sklearn.preprocessing import MultiLabelBinarizer
from networkx import DiGraph, relabel_nodes, all_pairs_shortest_path_length
from sklearn_hierarchical_classification.constants import ROOT
from sklearn_hierarchical_classification.metrics import h_fbeta_score, h_recall_score, h_precision_score, \
    fill_ancestors, multi_labeled
import sys
sys.path.append('.')


KEYS = ['id','labels']
logger = logging.getLogger("subtask_1_2a_scorer")
ch = logging.StreamHandler(sys.stdout)
ch.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
ch.setFormatter(formatter)
logger.setLevel(logging.INFO)
#logging.basicConfig(format='%(levelname)s : %(message)s', level=logging.INFO)

G = DiGraph()
G.add_edge(ROOT, "Logos")
G.add_edge("Logos", "Repetition")
G.add_edge("Logos", "Obfuscation, Intentional vagueness, Confusion")
G.add_edge("Logos", "Reasoning")
G.add_edge("Logos", "Justification")
G.add_edge('Justification', "Slogans")
G.add_edge('Justification', "Bandwagon")
G.add_edge('Justification', "Appeal to authority")
G.add_edge('Justification', "Flag-waving")
G.add_edge('Justification', "Appeal to fear/prejudice")
G.add_edge('Reasoning', "Simplification")
G.add_edge('Simplification', "Causal Oversimplification")
G.add_edge('Simplification', "Black-and-white Fallacy/Dictatorship")
G.add_edge('Simplification', "Thought-terminating cliché")
G.add_edge('Reasoning', "Distraction")
G.add_edge('Distraction', "Misrepresentation of Someone's Position (Straw Man)")
G.add_edge('Distraction', "Presenting Irrelevant Data (Red Herring)")
G.add_edge('Distraction', "Whataboutism")
G.add_edge(ROOT, "Ethos")
G.add_edge('Ethos', "Appeal to authority")
G.add_edge('Ethos', "Glittering generalities (Virtue)")
G.add_edge('Ethos', "Bandwagon")
G.add_edge('Ethos', "Ad Hominem")
G.add_edge('Ethos', "Transfer")
G.add_edge('Ad Hominem', "Doubt")
G.add_edge('Ad Hominem', "Name calling/Labeling")
G.add_edge('Ad Hominem', "Smears")
G.add_edge('Ad Hominem', "Reductio ad hitlerum")
G.add_edge('Ad Hominem', "Whataboutism")
G.add_edge(ROOT, "Pathos")
G.add_edge('Pathos', "Exaggeration/Minimisation")
G.add_edge('Pathos', "Loaded Language")
G.add_edge('Pathos', "Appeal to (Strong) Emotions")
G.add_edge('Pathos', "Appeal to fear/prejudice")
G.add_edge('Pathos', "Flag-waving")
G.add_edge('Pathos', "Transfer") 

def get_all_classes_from_graph(graph):
    return [
        node
        for node in graph.nodes
        if node != ROOT
        ]
    
def _h_fbeta_score(y_true, y_pred, class_hierarchy, beta=1., root=ROOT):
    hP = _h_precision_score(y_true, y_pred, class_hierarchy, root=root)
    hR = _h_recall_score(y_true, y_pred, class_hierarchy, root=root)
    if hP ==hR==0:
        return (0)
    return (1. + beta ** 2.) * hP * hR / (beta ** 2. * hP + hR)
    
def _fill_ancestors(y, graph, root, copy=True):
    y_ = y.copy() if copy else y
    paths = all_pairs_shortest_path_length(graph.reverse(copy=False))
    for target, distances in paths:
        if target == root:
            continue
        ix_rows = np.where(y[:, target] > 0)[0]
        ancestors = list(filter(lambda x: x != ROOT,distances.keys()))
        y_[tuple(np.meshgrid(ix_rows, ancestors))] = 1
    graph.reverse(copy=False)
    return y_
def _h_recall_score(y_true, y_pred, class_hierarchy, root=ROOT):
    y_true_ = _fill_ancestors(y_true, graph=class_hierarchy, root=root)
    y_pred_ = _fill_ancestors(y_pred, graph=class_hierarchy, root=root)

    ix = np.where((y_true_ != 0) & (y_pred_ != 0))

    true_positives = len(ix[0])
    all_positives = np.count_nonzero(y_true_)
    if all_positives==0:
        return 0.000000001

    
    return true_positives / all_positives

def _h_precision_score(y_true, y_pred, class_hierarchy, root=ROOT):
    y_true_ = _fill_ancestors(y_true, graph=class_hierarchy, root=root)
    y_pred_ = _fill_ancestors(y_pred, graph=class_hierarchy, root=root)

    ix = np.where((y_true_ != 0) & (y_pred_ != 0))

    true_positives = len(ix[0])
    all_results = np.count_nonzero(y_pred_)
    if all_results==0:
        return 0.000000001
    
    return true_positives / all_results
def read_classes(file_path):
  CLASSES = []
  with open(file_path) as f:
    for label in f.readlines():
      label = label.strip()
      if label:
        CLASSES.append(label)
  return CLASSES

def check_format(file_path):
  _classes = get_all_classes_from_graph(G)
  if not os.path.exists(file_path):
    logging.error("File doesnt exists: {}".format(file_path))
    return False
  submmission = ''
  try:
    with open(file_path, encoding='utf-8') as p:
      submission = json.load(p)
  except:
    logging.error("File is not a valid json file: {}".format(file_path))
    return False
  for i, obj in enumerate(submission):
    for key in KEYS:
      if key not in obj:
        logging.error("Missing entry in {}:{}".format(file_path, i))
        return False
  for label in list(obj['labels']):
       if label not in _classes:
         print(label)
         logging.error("Unknown Label in {}:{}".format(file_path, i))
         return False
  return True

def _read_gold_and_pred(pred_fpath, gold_fpath):
  """
  Read gold and predicted data.
  :param pred_fpath: a json file with predictions, 
  :param gold_fpath: the original annotated gold file.
  :return: {id:pred_labels} dict; {id:gold_labels} dict
  """

  gold_labels = {}
  with open(gold_fpath, encoding='utf-8') as gold_f:
    gold = json.load(gold_f)
    for obj in gold:
      gold_labels[obj['id']] = obj['labels']

  pred_labels = {}
  with open(pred_fpath, encoding='utf-8') as pred_f:
    pred = json.load(pred_f)
    for obj in pred:
      pred_labels[obj['id']] = obj['labels']

  if set(gold_labels.keys()) != set(pred_labels.keys()):
      logger.error('There are either missing or added examples to the prediction file. Make sure you only have the gold examples in the prediction file.')
      raise ValueError('There are either missing or added examples to the prediction file. Make sure you only have the gold examples in the prediction file.')
  
  return pred_labels, gold_labels

def evaluate_h(pred_labels, gold_labels):
    #pred_labels, gold_labels = _read_gold_and_pred(pred_file, gold_file)
  
    gold = []
    pred = []
    for id in gold_labels:
        gold.append(gold_labels[id])
        pred.append(pred_labels[id])
    with multi_labeled(gold, pred, G) as (gold_, pred_, graph_):
        return  _h_precision_score(gold_, pred_,graph_), _h_recall_score(gold_, pred_,graph_), _h_fbeta_score(gold_, pred_,graph_)

# Imports 

In [5]:
import numpy as np
import pandas as pd
from collections import deque
import random
import copy
import json

import torch
import sklearn.metrics as metrics
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from transformers import AutoTokenizer, BertModel, BertForSequenceClassification, RobertaForSequenceClassification,  AlbertForSequenceClassification
from torch.optim.lr_scheduler import ReduceLROnPlateau, LambdaLR
from torch.optim import AdamW

from tqdm.notebook import tqdm, trange

random_seed = 0
torch.manual_seed(random_seed)
random.seed(random_seed)
np.random.seed(random_seed)

In [6]:
TECHNIQUES_RAW = """Repetition
Obfuscation, Intentional vagueness, Confusion
Slogans
Bandwagon
Appeal to authority
Flag-waving
Appeal to fear/prejudice
Causal Oversimplification
Black-and-white Fallacy/Dictatorship
Thought-terminating cliché
Misrepresentation of Someone's Position (Straw Man)
Presenting Irrelevant Data (Red Herring)
Whataboutism
Glittering generalities (Virtue)
Doubt
Name calling/Labeling
Smears
Reductio ad hitlerum
Exaggeration/Minimisation
Loaded Language"""

TECHNIQUES_OTHER = """
Logos
Reasoning
Justification
Simplification
Distraction
Ethos
Ad Hominem
Pathos"""


TECHNIQUES_RAW = TECHNIQUES_RAW+TECHNIQUES_OTHER


#TECHNIQUES_RAW = """Presenting Irrelevant Data (Red Herring)"""
TECHNIQUES = TECHNIQUES_RAW.split('\n')
#TECHNIQUES = ['Logos', 'Pathos', 'Ethos']

In [7]:
LOGOS_all = ["Repetition", "Obfuscation, Intentional vagueness, Confusion", "Reasoning", "Justification"
        ,"Slogans", "Bandwagon", "Appeal to authority", "Flag-waving", "Appeal to fear/prejudice"
        ,"Simplification", "Causal Oversimplification", "Black-and-white Fallacy/Dictatorship", "Thought-terminating cliché"
        ,"Distraction","Misrepresentation of Someone's Position (Straw Man)", "Presenting Irrelevant Data (Red Herring)", "Whataboutism"]
ETHOS_all = ["Appeal to authority", "Glittering generalities (Virtue)", "Bandwagon", "Ad Hominem", "Transfer"
        ,"Doubt", "Name calling/Labeling", "Smears", "Reductio ad hitlerum", "Whataboutism"]
PATHOS_all = ["Exaggeration/Minimisation", "Loaded Language", "Appeal to (Strong) Emotions", "Appeal to fear/prejudice", "Flag-waving", "Transfer"]
JUSTIFICATION_all = ["Slogans", "Bandwagon", "Appeal to authority", "Flag-waving", "Appeal to fear/prejudice"]
REASONING_all = ["Simplification", "Distraction" , "Causal Oversimplification", "Black-and-white Fallacy/Dictatorship", "Thought-terminating cliché"
                ,"Misrepresentation of Someone's Position (Straw Man)", "Presenting Irrelevant Data (Red Herring)", "Whataboutism"]
SIMPLIFICATION_all = ["Causal Oversimplification", "Black-and-white Fallacy/Dictatorship", "Thought-terminating cliché"]
DISTRACTION_all = ["Misrepresentation of Someone's Position (Straw Man)", "Presenting Irrelevant Data (Red Herring)", "Whataboutism"]
ADHOMINEM_all = ["Doubt", "Name calling/Labeling", "Smears", "Reductio ad hitlerum", "Whataboutism"]

SUPERCLASSES_all = [LOGOS_all, ETHOS_all, PATHOS_all, JUSTIFICATION_all, REASONING_all, SIMPLIFICATION_all, DISTRACTION_all, ADHOMINEM_all]


In [8]:
LOGOS = ["Repetition", "Obfuscation, Intentional vagueness, Confusion", "Reasoning", "Justification"
        ,"Slogans", "Bandwagon", "Appeal to authority", "Flag-waving", "Appeal to fear/prejudice"
        ,"Simplification", "Causal Oversimplification", "Black-and-white Fallacy/Dictatorship", "Thought-terminating cliché"
        ,"Distraction","Misrepresentation of Someone's Position (Straw Man)", "Presenting Irrelevant Data (Red Herring)", "Whataboutism"]
ETHOS = ["Appeal to authority", "Glittering generalities (Virtue)", "Bandwagon", "Ad Hominem", "Transfer"
        ,"Doubt", "Name calling/Labeling", "Smears", "Reductio ad hitlerum", "Whataboutism"]
PATHOS = ["Exaggeration/Minimisation", "Loaded Language", "Appeal to (Strong) Emotions", "Appeal to fear/prejudice", "Flag-waving", "Transfer"]
JUSTIFICATION = ["Slogans", "Bandwagon", "Appeal to authority", "Flag-waving", "Appeal to fear/prejudice"]
REASONING = ["Simplification", "Distraction"]
SIMPLIFICATION = ["Causal Oversimplification", "Black-and-white Fallacy/Dictatorship", "Thought-terminating cliché"]
DISTRACTION = ["Misrepresentation of Someone's Position (Straw Man)", "Presenting Irrelevant Data (Red Herring)", "Whataboutism"]
ADHOMINEM = ["Doubt", "Name calling/Labeling", "Smears", "Reductio ad hitlerum", "Whataboutism"]

SUPERCLASSES = [LOGOS, ETHOS, PATHOS, JUSTIFICATION, REASONING, SIMPLIFICATION, DISTRACTION, ADHOMINEM]


In [9]:
CONVERSION_PTC_SEMEVAL = {}
CONVERSION_PTC_SEMEVAL["Exaggeration,Minimisation"] = "Exaggeration/Minimisation"
CONVERSION_PTC_SEMEVAL["Thought-terminating_Cliches"] = "Thought-terminating cliché"
CONVERSION_PTC_SEMEVAL["Flag-Waving"] = "Flag-waving"
CONVERSION_PTC_SEMEVAL["Appeal_to_fear-prejudice"] = "Appeal to fear/prejudice"
CONVERSION_PTC_SEMEVAL["Causal_Oversimplification"] = "Causal Oversimplification"
CONVERSION_PTC_SEMEVAL["Appeal_to_Authority"] = "Appeal to authority"
CONVERSION_PTC_SEMEVAL["Whataboutism,Straw_Men,Red_Herring"] = "Whataboutism" #here
CONVERSION_PTC_SEMEVAL["Bandwagon,Reductio_ad_hitlerum"] = "Reductio ad hitlerum" #here
CONVERSION_PTC_SEMEVAL["Loaded_Language"] = "Loaded Language"
CONVERSION_PTC_SEMEVAL["Black-and-White_Fallacy"] = "Black-and-white Fallacy/Dictatorship"
CONVERSION_PTC_SEMEVAL["Name_Calling,Labeling"] = "Name calling/Labeling"




# Data

In [10]:
def transformData_3way(train):
    #0: Logos, 1: Pathos, 2: Ethos
    train_h = []
    for i in range(0, len(train)):
        d = {}
        t = train[i]
        L = t['labels']
        L2 = []
        for tech in L:
            if tech in PATHOS:
                L2.append("Pathos")
            if tech in LOGOS:
                L2.append("Logos")
            if tech in ETHOS:
                L2.append("Ethos")
        L2 = list(set(L2))
        d['labels'] = L2
        d['text'] = t['text']
        d['id'] = t['id']
        train_h.append(d)
    return train_h

def transformData_1class(train):
    #0: Logos, 1: Pathos, 2: Ethos
    train_h = []
    for i in range(0, len(train)):
        d = {}
        t = train[i]
        L = t['labels']
        L2 = []
        if TECHNIQUES[0] in L:
            L2.append(TECHNIQUES[0])
        L2 = list(set(L2))
        d['labels'] = L2
        d['text'] = t['text']
        d['id'] = t['id']
        train_h.append(d)
    return train_h


def addSuperClasses(train):
    train_h = []
    for i in range(0, len(train)):
        d = {}
        t = train[i]
        L = t['labels']
        L2 = []
        for tech in L:
            L2.append(tech)
            if tech in PATHOS:
                L2.append("Pathos")
            if tech in LOGOS:
                L2.append("Logos")
            if tech in ETHOS:
                L2.append("Ethos")
            if tech in JUSTIFICATION:
                L2.append("Justification")
                L2.append("Logos")
            if tech in REASONING:
                L2.append("Reasoning")
                L2.append("Logos")
            if tech in SIMPLIFICATION:
                L2.append("Simplification")
                L2.append("Reasoning")
                L2.append("Logos")
            if tech in DISTRACTION:
                L2.append('Distraction')
                L2.append("Reasoning")
                L2.append("Logos")
            if tech in ADHOMINEM:
                L2.append('Ad Hominem')
                L2.append("Ethos")
        L2 = list(set(L2))
        d['labels'] = L2
        d['text'] = t['text']
        d['id'] = t['id']
        train_h.append(d)
    return train_h



In [11]:
path = '/path/to/semeval/data/'

In [11]:
with open(path+'train.json', 'r') as f:
    train = json.load(f)
with open(path+'validation.json', 'r') as f:
    validation = json.load(f)
with open(path+'dev_gold_labels/dev_subtask1_en.json', 'r') as f:
    dev = json.load(f)

NameError: name 'path' is not defined

In [13]:
with open(path+'2021_data/training_set_task1.txt', 'r') as f:
    old_train = json.load(f)
with open(path+'2021_data/test_set_task1.txt', 'r') as f:
    old_test = json.load(f)
with open(path+'2021_data/dev_set_task1.txt', 'r') as f:
    old_dev = json.load(f)

In [14]:
train_ptc = pd.read_csv(path+'PTC/datasets/train_sentences.csv')
dev_ptc = pd.read_csv(path+'PTC/datasets/dev_sentences.csv')

ptc_data = pd.concat([train_ptc, dev_ptc])

In [15]:
sents = ptc_data['sentences'].tolist()
props = ptc_data['propaganda'].tolist()

very_old_train = []
for i in range(0, len(props)):
    s = sents[i]
    p = props[i]
    p = p.replace("\'", '\"')
    p = json.loads(p)
    if p !=[]:
        d = {}
        pp = []
        for propaganda in p:
            p2 = propaganda
            if p2 not in TECHNIQUES:
                p2 = CONVERSION_PTC_SEMEVAL[propaganda]
            pp.append(p2)
        d['labels'] = pp
        d['text'] = s
        very_old_train.append(d)

In [92]:
with open(path+'0_augmented50.json', 'r') as f:
    red_herring = json.load(f)
with open(path+'1_augmented50.json', 'r') as f:
    obfuscation = json.load(f)
with open(path+'2_augmented50.json', 'r') as f:
    strawman = json.load(f)
with open(path+'3_augmented50.json', 'r') as f:
    thought = json.load(f)


In [93]:
augmented = []
problematic_conspiracies = [
    "Presenting Irrelevant Data (Red Herring)",
    "Obfuscation, Intentional vagueness, Confusion",
    "Misrepresentation of Someone's Position (Straw Man)",
    "Thought-terminating cliché"
]

for data_augmented in [red_herring, obfuscation, strawman, thought]:
    for text in data_augmented:
        d = {}
        d['text'] = text
        d['labels'] = [problematic_conspiracies[[red_herring, obfuscation, strawman, thought].index(data_augmented)]]
        augmented.append(d)
        

In [94]:
train = train+old_train+old_test+old_dev#+very_old_train
len(train)

8902

In [18]:
train = addSuperClasses(train)
validation = addSuperClasses(validation)
dev = addSuperClasses(dev)


In [26]:
train_text = []
train_labels = []
train_labels_binary = []
for i in range(0, len(train)):
    doc = train[i]
    train_text.append(doc['text'])
    
    l = doc['labels']
    L = [0 for i in range(0, len(TECHNIQUES))]
    for t in l:
        L[TECHNIQUES.index(t)] = 1
    train_labels.append(L)
    train_labels_binary.append(1 if sum(L)>0 else 0)
    
validation_text = []
validation_labels = []
validation_labels_binary = []
for i in range(0, len(validation)):
    doc = validation[i]
    validation_text.append(doc['text'])
    
    l = doc['labels']
    L = [0 for i in range(0, len(TECHNIQUES))]
    for t in l:
        L[TECHNIQUES.index(t)] = 1
    validation_labels.append(L)
    validation_labels_binary.append(1 if sum(L)>0 else 0)

dev_text = []
dev_labels = []
dev_labels_binary = []
for i in range(0, len(dev)):
    doc = dev[i]
    dev_text.append(doc['text'])
    
    l = doc['labels']
    L = [0 for i in range(0, len(TECHNIQUES))]
    for t in l:
        L[TECHNIQUES.index(t)] = 1
    dev_labels.append(L)
    dev_labels_binary.append(1 if sum(L)>0 else 0)


In [27]:
weights_tmp = [0 for i in range(0, len(TECHNIQUES))]
for i in range(0, len(TECHNIQUES)):
    for j in range(0, len(train_labels)):
        if train_labels[j][i]>0:
            weights_tmp[i]+=1

weights_techniques = torch.Tensor([[len(train_labels)/(len(train_labels)-w), len(train_labels)/w] for w in weights_tmp]).to('cuda')
weights_techniques

tensor([[  1.0081, 124.2344]], device='cuda:0')

In [31]:
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
#tokenizer = AutoTokenizer.from_pretrained('albert-base-v2')

tokenized_input = tokenizer(train_text)

m = 0
for tokens in tokenized_input['input_ids']:
    if len(tokens)>m:
        m=len(tokens)
m

375

In [32]:
MAX_LEN = 256 #128 < m some texts will be truncated

tokenized_train = tokenizer(train_text, max_length=MAX_LEN, padding='max_length', truncation=True)
tokenized_validation = tokenizer(validation_text, max_length=MAX_LEN, padding='max_length', truncation=True)
tokenized_dev = tokenizer(dev_text, max_length=MAX_LEN, padding='max_length', truncation=True)
    
train_input_ids, train_token_type_ids, train_attention_mask = tokenized_train['input_ids'], tokenized_train['token_type_ids'], tokenized_train['attention_mask']
validation_input_ids, validation_token_type_ids, validation_attention_mask = tokenized_validation['input_ids'], tokenized_validation['token_type_ids'], tokenized_validation['attention_mask']
dev_input_ids, dev_token_type_ids, dev_attention_mask = tokenized_dev['input_ids'], tokenized_dev['token_type_ids'], tokenized_dev['attention_mask']

train_token_type_ids = torch.tensor(train_token_type_ids)
validation_token_type_ids = torch.tensor(validation_token_type_ids)
dev_token_type_ids = torch.tensor(dev_token_type_ids)

# Convert to torch tensor
train_input_ids = torch.tensor(train_input_ids)
train_labels = torch.tensor(train_labels)
train_labels_binary = torch.tensor(train_labels_binary)
train_attention_mask = torch.tensor(train_attention_mask)
train_token_type_ids = torch.tensor(train_token_type_ids)

validation_input_ids = torch.tensor(validation_input_ids)
validation_labels = torch.tensor(validation_labels)
validation_labels_binary = torch.tensor(validation_labels_binary)
validation_attention_mask = torch.tensor(validation_attention_mask)
validation_token_type_ids = torch.tensor(validation_token_type_ids)

dev_input_ids = torch.tensor(dev_input_ids)
dev_labels = torch.tensor(dev_labels)
dev_labels_binary = torch.tensor(dev_labels_binary)
dev_attention_mask = torch.tensor(dev_attention_mask)
dev_token_type_ids = torch.tensor(dev_token_type_ids)


  train_token_type_ids = torch.tensor(train_token_type_ids)
  validation_token_type_ids = torch.tensor(validation_token_type_ids)
  dev_token_type_ids = torch.tensor(dev_token_type_ids)


In [33]:
batch_size = 16 #

train_data = TensorDataset(train_input_ids, train_attention_mask, train_labels, train_token_type_ids)
validation_data = TensorDataset(validation_input_ids, validation_attention_mask, validation_labels, validation_token_type_ids)
dev_data = TensorDataset(dev_input_ids, dev_attention_mask, dev_labels, dev_token_type_ids)

    
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)

validation_sampler = SequentialSampler(validation_data)
validation_dataloader = DataLoader(validation_data, sampler=validation_sampler, batch_size=batch_size)

dev_sampler = SequentialSampler(dev_data)
dev_dataloader = DataLoader(dev_data, sampler=dev_sampler, batch_size=batch_size)

# Model

In [34]:
class bert(nn.Module):
    
    def __init__(self, n_classes):
        super().__init__()
        self.n_classes = n_classes
        self.bert = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=n_classes)
        #self.bert = RobertaForSequenceClassification.from_pretrained("roberta-base", num_labels=n_classes)
        
    def forward(self, input_ids, token_type_ids, input_mask):
        outputs = self.bert(input_ids = input_ids, token_type_ids = token_type_ids, attention_mask = input_mask)

        logits = outputs.logits
        
        return logits  

In [36]:
device="cuda"

model = bert(len(TECHNIQUES)*2) # *2 for CE loss
model.to(device)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

bert(
  (bert): BertForSequenceClassification(
    (bert): BertModel(
      (embeddings): BertEmbeddings(
        (word_embeddings): Embedding(30522, 768, padding_idx=0)
        (position_embeddings): Embedding(512, 768)
        (token_type_embeddings): Embedding(2, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): BertEncoder(
        (layer): ModuleList(
          (0): BertLayer(
            (attention): BertAttention(
              (self): BertSelfAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features=768, bias=True)
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (output): BertSelfOutput(
                (dense): Linear(in_features=768, out_features=768, bias=True)
       

In [38]:
# Freeze layers
# for param in model.bert.bert.encoder.layer[:3].parameters():
#     param.requires_grad = False


In [41]:
#optimizer_grouped_parameters
optimizer = AdamW(model.parameters(),
                  lr=7e-5,
                  #lr=5e-6,
                  weight_decay = 0.01)

scheduler = ReduceLROnPlateau(optimizer, patience=4, factor=0.3)

In [42]:
criterions = []

for i in range(0, len(TECHNIQUES)):
    criterions.append(nn.CrossEntropyLoss(weight = weights_techniques[i]))


In [43]:
criterion = nn.BCEWithLogitsLoss(pos_weight=weights_techniques[:,1])

# Training

In [44]:
def convert_numbers_to_name(predictions_sep):
    PREDS = [[predictions_sep[i][k] for i in range(0, len(TECHNIQUES))] for k in range(0, len(predictions_sep[0]))]
    PREDS_NAMES = []
    for i in range(0, len(PREDS)):
        P = []
        for k in range(0, len(TECHNIQUES)):
            p = PREDS[i][k]
            if p==1:
                P.append(TECHNIQUES[k])
        PREDS_NAMES.append(P)
        
    return (PREDS_NAMES)

def evaluate_preds(predictions_sep, dev):
    PREDS_NAMES = convert_numbers_to_name(predictions_sep)
    
    prediction_dict = {}
    ground_truth_dict = {}
    for i in range(0, len(dev)):
        prediction_dict[dev[i]['id']] = PREDS_NAMES[i]
        ground_truth_dict[dev[i]['id']] = dev[i]['labels']
    
    p, r, f = evaluate_h(prediction_dict, ground_truth_dict)
    return (p, r, f)
    
    
    

In [None]:
epochs = 15

best_loss = 999
best_state_dict = model.state_dict()
best_epoch = 0
METRICS = []
best_f1h = 0
sig = nn.Sigmoid()

for e in trange(0, epochs):

    # Training
    print('Starting epoch ', e)
    model.train()
    
    tr_loss = 0
    nb_tr_examples, nb_tr_steps = 0, 0

    for step, batch in enumerate(train_dataloader):

        batch = tuple(t.to(device) for t in batch)

        b_input_ids, b_input_mask, b_labels, b_token_type_ids = batch
        
        b_labels = b_labels.float()
        optimizer.zero_grad()
        
        logits = model(b_input_ids, b_token_type_ids, b_input_mask) #.logits
        
        losses = []
        for i in range(0, len(TECHNIQUES)):
            logits_i = logits[:,2*i:2*i+2]
            labels_i = b_labels[:, i].long()
            loss_i = criterions[i](logits_i, labels_i)
            losses.append(loss_i)
        loss = sum(losses)
        
        #loss = criterion(logits, b_labels)
        
        
        #loss = criterion(logits, b_labels.long())
        loss.backward()
        optimizer.step()

        tr_loss += loss.item()
        nb_tr_examples += b_input_ids.size(0)
        nb_tr_steps += 1

    print("Train loss: {}".format(tr_loss/nb_tr_steps))
    
    
    # Testing
    
    model.eval()
    
    tweets_test = []
    
    predictions_sep = [[], [], [], [], [], [], [], [], [],[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], []]

    labels_sep = [[], [], [], [], [], [], [], [], [],[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], []]
    
    eval_loss = 0
    steps=0
    for step, batch in enumerate(validation_dataloader):

        # Add batch to GPU
        batch = tuple(t.to(device) for t in batch)

        b_input_ids, b_input_mask, b_labels, b_token_type_ids = batch
            
        b_labels = b_labels.float()
        
        with torch.no_grad():

            logits = model(b_input_ids, b_token_type_ids, b_input_mask)
            losses = []
            for i in range(0, len(TECHNIQUES)):
                logits_i = logits[:,2*i:2*i+2]
                labels_i = b_labels[:, i].long()
                loss_i = criterions[i](logits_i, labels_i)
                losses.append(loss_i)
            loss = sum(losses)
            #loss = criterion(logits, b_labels.long())
            #loss = criterion(logits, b_labels)

        logits = logits.detach().cpu().numpy()
        ground_truth = b_labels.detach().cpu().numpy()
        
        steps+=1
        eval_loss+=loss.detach().item()
                
        for i in range(0, len(TECHNIQUES)):
            for p in logits:
                p_i = p[2*i:2*i+2]
                pred = np.argmax(p_i)
                predictions_sep[i].append(pred)
            #for p in logits:
            #    p_sig = sig(torch.Tensor(p))
            #    p_01 = (p_sig>0.5).int()
            #    predictions_sep[i].append(p_01[i].item())
                
            for l in ground_truth:
                labels_sep[i].append(l[i])
        #labels_sep[0].extend(b_labels.int().cpu().numpy())
        #predictions_sep[0].extend(logits.argmax(1))
        
    #scheduler.step(e)          
    scheduler.step(eval_loss/steps)
    LOSS = eval_loss/steps
    
    precision, recall, f1_h = evaluate_preds(predictions_sep, validation)
    F1s = []
    for i in range(0, len(TECHNIQUES)):
        F1s.append(round(metrics.f1_score(labels_sep[i], predictions_sep[i]), 3))
    #precision, recall, f1_h, support =  metrics.precision_recall_fscore_support(labels_sep[0], predictions_sep[0], average='macro')
    METRICS.append([precision, recall, f1_h, F1s])
    
    if f1_h> best_f1h:
        best_loss = LOSS
        best_state_dict = copy.deepcopy(model.state_dict())
        best_epoch = e
        best_f1h = f1_h
    
    print("\t Eval loss: {}".format(LOSS))
    print("\t Eval F1H: {}, PREH: {}, RECH: {}".format(round(f1_h, 3), round(precision, 3), round(recall, 3)))
    print("\t Eval F1s: {}".format(F1s))
    print("---"*25)
    print("\n")


  0%|          | 0/15 [00:00<?, ?it/s]

Starting epoch  0
Train loss: 0.3923068821795146
	 Eval loss: 0.3691267544636503
	 Eval F1H: 0.0, PREH: 0.0, RECH: 0.0
	 Eval F1s: [0.0]
---------------------------------------------------------------------------


Starting epoch  1
Train loss: 0.3708395712692253
	 Eval loss: 0.3494117090012878
	 Eval F1H: 0.0, PREH: 0.0, RECH: 0.0
	 Eval F1s: [0.0]
---------------------------------------------------------------------------


Starting epoch  2
Train loss: 0.3609117641820154
	 Eval loss: 0.36308928159996867
	 Eval F1H: 0.0, PREH: 0.0, RECH: 0.0
	 Eval F1s: [0.0]
---------------------------------------------------------------------------


Starting epoch  3
Train loss: 0.3476922157139965
	 Eval loss: 0.3867275263182819
	 Eval F1H: 0.0, PREH: 0.0, RECH: 0.0
	 Eval F1s: [0.0]
---------------------------------------------------------------------------


Starting epoch  4
Train loss: 0.346532984301298
	 Eval loss: 0.3903538975864649
	 Eval F1H: 0.0, PREH: 0.0, RECH: 0.0
	 Eval F1s: [0.0]
---

In [None]:
[m[2] for m in METRICS]

In [None]:
torch.save(best_state_dict, path+'best_model.pth')


# Inference

In [38]:
!ls /data/peskine/semeval2024/models/

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
 task1_AlbertW_merged_all_data_e9_0.598.pth
 task1_ALBERTW_merged_data__e13_0.551.pth
 task1_bert-base-uncased-finetuned-hateful-memeW_merged_all_data_e9_0.618.pth
 task1_BERT_casedW_merged_all_data_e8_0.578.pth
 task1_BERT_casedW_merged_data__e9_0.569.pth
 task1_BERT_merged_all_data+PTC_e6_0.566.pth
 task1_BERT_merged_data_BCE_unweighted_e9_0.58.pth
 task1_BERT_merged_data_BCE_weighted_e9_0.603.pth
 task1_BERT_merged_data_BCE_weighted_epoch15_lr3e5_e9_0.605.pth
 task1_BERT_merged_data_e8_0.627.pth
 task1_BERT_merged_data_inbetween_weights_e9_0.623.pth
 task1_BERT_merged_data_more_weights_e11_0.613.pth
 task1_BERT_merged_data_more_weights_freeze_3_e9_0.625.pth
 task1_BERT_merged_data+ptc_BCE_wei

In [41]:
model.load_state_dict(torch.load(path+'best_model.pth'))


bert(
  (bert): BertForSequenceClassification(
    (bert): BertModel(
      (embeddings): BertEmbeddings(
        (word_embeddings): Embedding(30522, 768, padding_idx=0)
        (position_embeddings): Embedding(512, 768)
        (token_type_embeddings): Embedding(2, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): BertEncoder(
        (layer): ModuleList(
          (0): BertLayer(
            (attention): BertAttention(
              (self): BertSelfAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features=768, bias=True)
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (output): BertSelfOutput(
                (dense): Linear(in_features=768, out_features=768, bias=True)
       

In [46]:
model.eval()

predictions_sep = [[], [], [], [], [], [], [], [], [],[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], []]

labels_sep = [[], [], [], [], [], [], [], [], [],[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], []]

eval_loss = 0
steps=0
for step, batch in enumerate(dev_dataloader):

    # Add batch to GPU
    batch = tuple(t.to(device) for t in batch)

    b_input_ids, b_input_mask, b_labels, b_token_type_ids = batch

    b_labels = b_labels.float()

    with torch.no_grad():

        logits = model(b_input_ids, b_token_type_ids, b_input_mask)
        #logits = model(b_input_ids, b_token_type_ids, b_input_mask).logits



    logits = logits.detach().cpu().numpy()
    ground_truth = b_labels.detach().cpu().numpy()

    steps+=1

    for i in range(0, len(TECHNIQUES)):
        for p in logits:
            p_i = p[2*i:2*i+2]
            pred = np.argmax(p_i)
            predictions_sep[i].append(pred)
        #for p in logits:
        #    p_sig = sig(torch.Tensor(p))
        #    p_01 = (p_sig>0.5).int()
        #    predictions_sep[i].append(p_01[i].item())

        for l in ground_truth:
            labels_sep[i].append(l[i])


precision, recall, f1_h = evaluate_preds(predictions_sep, dev)
F1s = []
for i in range(0, len(TECHNIQUES)):
    F1s.append(round(metrics.f1_score(labels_sep[i], predictions_sep[i]), 3))


print("\t Eval loss: {}".format(LOSS))
print("\t Eval F1H: {}, PREH: {}, RECH: {}".format(round(f1_h, 3), round(precision, 3), round(recall, 3)))
print("\t Eval F1s: {}".format(F1s))
print("---"*25)
print("\n")

	 Eval loss: 28.09398991721017
	 Eval F1H: 0.597, PREH: 0.574, RECH: 0.622
	 Eval F1s: [0.283, 0.0, 0.428, 0.273, 0.822, 0.437, 0.195, 0.196, 0.352, 0.197, 0.105, 0.0, 0.273, 0.389, 0.241, 0.55, 0.532, 0.2, 0.39, 0.604, 0.708, 0.514, 0.666, 0.455, 0.279, 0.774, 0.685, 0.671]
---------------------------------------------------------------------------




In [47]:
METRICS = [f1_h, precision, recall]
METRICS.extend(F1s)
METRICS_STR = []
for m in METRICS:
    METRICS_STR.append(str(round(m, 3)))

"%".join(METRICS_STR)

'0.597%0.574%0.622%0.283%0.0%0.428%0.273%0.822%0.437%0.195%0.196%0.352%0.197%0.105%0.0%0.273%0.389%0.241%0.55%0.532%0.2%0.39%0.604%0.708%0.514%0.666%0.455%0.279%0.774%0.685%0.671'

In [36]:
for i in range(0, len(TECHNIQUES)):
    print(TECHNIQUES[i], ':', F1s[i])

Repetition : 0.463
Obfuscation, Intentional vagueness, Confusion : 0.0
Slogans : 0.462
Bandwagon : 0.286
Appeal to authority : 0.835
Flag-waving : 0.503
Appeal to fear/prejudice : 0.341
Causal Oversimplification : 0.101
Black-and-white Fallacy/Dictatorship : 0.422
Thought-terminating cliché : 0.306
Misrepresentation of Someone's Position (Straw Man) : 0.111
Presenting Irrelevant Data (Red Herring) : 0.0
Whataboutism : 0.326
Glittering generalities (Virtue) : 0.541
Doubt : 0.355
Name calling/Labeling : 0.575
Smears : 0.535
Reductio ad hitlerum : 0.133
Exaggeration/Minimisation : 0.423
Loaded Language : 0.627


# Generate submission file

In [None]:
PREDS_NAMES = convert_numbers_to_name(predictions_sep)
submission = []
for i in range(0, len(dev)):
    d = {}
    d['id'] = dev[i]['id']
    d['labels'] = PREDS_NAMES[i]
    submission.append(d)

In [None]:
with open(path+'submission_test_v2.txt', 'w') as f:
    json.dump(submission, f)