# Multitask Hierachical Neural Network for Persuasion Techniques Detection

This is a solution of kb team for Semeval 2023 task 3 subtask 3

In [2]:
import random
import os
import numpy as np
import torch
def seed_everything(seed=73):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    # some cudnn methods can be random even after fixing the seed unless you tell it to be deterministic
    torch.backends.cudnn.deterministic = True

seed_everything(1234)


In [None]:
lang="en" #set language

## READ input data and spans

In [8]:
import pandas as pd

In [9]:
from tqdm import tqdm

def make_dataframe_subtask3(input_folder, labels_fn=None, spans=None):
    txt_files = [f for  f in os.listdir(spans) if f.endswith('.txt')]
    print("number of files: ", len(txt_files))
    # print()
    df_labels = pd.DataFrame(columns=["id","label", "start", "end"])
    # print(txt_files)
    for i, js in enumerate(txt_files):
        # print(js
        with open(os.path.join(spans, js)) as file:
            line=file.readline()
            # print(line)
            while line!="":
                l=line.split()
                # print(l[0])
                df_labels=df_labels.append({"id":l[0], "label":l[1], "start":l[2], "end":l[3]}, ignore_index=True)
                line=file.readline()

    #MAKE TXT DATAFRAME
    text = pd.DataFrame()
    articles=[]
    for fil in tqdm(filter(lambda x: x.endswith('.txt') and x.startswith("art"), os.listdir(input_folder))):
        iD = fil[7:].split('.')[0]
        # print("------------------------------------------")
        # print(fil)
        art_labels=df_labels[df_labels["id"]==iD]
        art_labels["start"]=art_labels["start"].astype(int)
        art_labels["end"]=art_labels["end"].astype(int)

        article=open(input_folder+fil,'r', encoding="utf-8", errors='ignore').read()
        # print(article)
        for row in art_labels.iterrows():
            start= int(row[1]["start"])#-12
            end= int(row[1]["end"])#-12
            # print(article[start : end], row[1]["label"])
        lines = list(enumerate(open(input_folder+fil,'r', encoding="utf-8", errors='ignore').read().splitlines(),1))
        start_line=0
        end_line=0
        start_ends=()
        for line in lines:
            # print(line[1])
            start_line=end_line
            end_line=end_line+len(line[1])
            # print(start_line)
            od_do=list()
            for span in art_labels[(art_labels.start>=start_line)&(art_labels.end<end_line)].iterrows():
                od=span[1].start-start_line
                do=span[1].end-start_line
                od_do.append({"start":od, "end":do, "label":span[1]["label"]})
            text=text.append({"id": iD, "line":line[0], "text":line[1], "spans":od_do}, ignore_index=True)
            
                
        # text.extend([(iD,) + line for line in lines])
    print(text)
    df_text = pd.DataFrame(text, columns=['id','line','text', "spans"])
    df_text.id = df_text.id.apply(int)
    df_text.line = df_text.line.apply(int)
    df_text = df_text[df_text.text.str.strip().str.len() > 0].copy()
    df_text = df_text.set_index(['id','line'])
    
    df = df_text

    if labels_fn:
        #MAKE LABEL DATAFRAME
        labels = pd.read_csv(labels_fn,sep='\t',encoding='utf-8',header=None)
        labels = labels.rename(columns={0:'id',1:'line',2:'labels'})
        labels = labels.set_index(['id','line'])
        labels = labels[labels.labels.notna()].copy()

        #JOIN
        df = labels.join(df_text)[['text','spans','labels']]
    return df

In [None]:
labels_test_fn="/data/en/dev-labels-subtask-3.txt"
folder_dev="/data/en/dev-articles-subtask-3/"
labels_train_fn="/data/en/train-labels-subtask-3.txt"
folder_train="/data/en/train-articles-subtask-3/"
train_span="/data/"+lang+"/train-labels-subtask-3-spans"
test_span="/data/"+lang+"/dev-labels-subtask-3-spans"

print('Loading training...')
train=make_dataframe_subtask3(folder_train, labels_train_fn, train_span)
print('Loading dev...')
test=make_dataframe_subtask3(folder_dev, labels_test_fn, test_span)

In [13]:
train.reset_index(inplace=True)
test.reset_index(inplace=True)

train

Unnamed: 0,id,line,text,spans,labels
0,111111111,3,Geneva - The World Health Organisation chief o...,"[{'start': 90, 'end': 98, 'label': 'Doubt'}]",Doubt
1,111111111,5,"""The next transmission could be more pronounce...","[{'start': 5, 'end': 63, 'label': 'Appeal_to_A...",Appeal_to_Authority
2,111111111,13,"But Tedros voiced alarm that ""plague in Madaga...","[{'start': 74, 'end': 96, 'label': 'Repetition'}]",Repetition
3,111111111,17,He also pointed to the presence of the pneumon...,[],Appeal_to_Fear-Prejudice
4,111111111,19,He praised the rapid response from WHO and Mad...,[],Appeal_to_Fear-Prejudice
...,...,...,...,...,...
3755,999001970,4,Also the Left killed comedy. This is what its ...,"[{'start': 8, 'end': 30, 'label': 'Slogans'}]","Exaggeration-Minimisation,Slogans"
3756,999001970,5,Saturday Night Live writer and comedian Nimesh...,[],Exaggeration-Minimisation
3757,999001970,6,That's what Columbia snowflakes thought was of...,"[{'start': 17, 'end': 36, 'label': 'Name_Calli...",Name_Calling-Labeling
3758,999001970,8,"Comrades, these jokes you have been listening ...",[],"Exaggeration-Minimisation,Name_Calling-Labeling"


## Change span to IO

In [None]:

import spacy


#add spacy model for chosen language
if lang=="po":
    nlp = spacy.load("pl_core_news_sm")
if lang=="en":
    nlp = spacy.load("en_core_web_sm")
    
train["tokens"]=""
train["pos"]=""
train["mani_tags"]=""




#train
for i, (sentence, annotations) in enumerate(zip(train["text"],train["spans"])):
    doc=nlp(sentence)
    token_list=[]
    mani_tag=[]
    
    for token in doc:
        token_list.append(str(token.text))
#         print(token)
        pos_list.append(str(token.pos_))


    train["tokens"][i]=token_list
    train["pos"][i]=pos_list
    
    if len(annotations)==0:
        
        for j in range(0,len(token_list)):
            mani_tag.append("O")
        
        train["mani_tags"][i]=mani_tag
    else:
        start=0
        ann_mani=pd.DataFrame(annotations)
        ann_mani= ann_mani.sort_values("start")
   
        for j, ann in ann_mani.iterrows():
            token_idx=0
            for token in nlp(sentence[start:ann["start"]].strip()):
    #                 
                    mani_tag.append("O")
            for token in nlp(sentence[ann["start"]:ann["end"]].strip()):
                start=ann["end"]
                if(token_idx==0):
                    mani_tag.append("I")
                else:
                    mani_tag.append("I")

                token_idx+=1
            for token in nlp(sentence[ann["end"]:].strip()):
                    mani_tag.append("O")
        train["mani_tags"][i]=mani_tag

 

In [None]:
test["tokens"]=""
test["pos"]=""
test["mani_tags"]=""

#test
for i, (sentence, annotations) in enumerate(zip(test["text"],test["spans"])):
    doc=nlp(sentence)
    token_list=[]
    mani_tag=[]
    
    for token in doc:
        token_list.append(str(token.text))

        pos_list.append(str(token.pos_))


    test["tokens"][i]=token_list
    test["pos"][i]=pos_list
    
    if len(annotations)==0:
        
        for j in range(0,len(token_list)):
            mani_tag.append("O")
        
        test["mani_tags"][i]=mani_tag
    else:
        start=0
        ann_mani=pd.DataFrame(annotations)
        ann_mani= ann_mani.sort_values("start")
   
        for j, ann in ann_mani.iterrows():
            token_idx=0
            for token in nlp(sentence[start:ann["start"]].strip()):
                    mani_tag.append("O")
            for token in nlp(sentence[ann["start"]:ann["end"]].strip()):
                start=ann["end"]
                if(token_idx==0):
                    mani_tag.append("I-")
                else:
                    mani_tag.append("I-")

                token_idx+=1
            for token in nlp(sentence[ann["end"]:].strip()):
                    mani_tag.append("O")
        test["mani_tags"][i]=mani_tag


In [24]:

import pandas as pd 

frames_list=[]
for frames in train["labels"]:
  fs=frames.split(",")
  for f in fs:
    frames_list.append(f)

frames_to_ids = {k: v for v, k in enumerate(set(frames_list))}
ids_to_frames = {v: k for v, k in enumerate(set(frames_list))}
frames_to_ids

{'False_Dilemma-No_Choice': 0,
 'Guilt_by_Association': 1,
 'Loaded_Language': 2,
 'Flag_Waving': 3,
 'Obfuscation-Vagueness-Confusion': 4,
 'Red_Herring': 5,
 'Appeal_to_Authority': 6,
 'Whataboutism': 7,
 'Doubt': 8,
 'Appeal_to_Popularity': 9,
 'Conversation_Killer': 10,
 'Causal_Oversimplification': 11,
 'Name_Calling-Labeling': 12,
 'Appeal_to_Fear-Prejudice': 13,
 'Straw_Man': 14,
 'Slogans': 15,
 'Repetition': 16,
 'Appeal_to_Hypocrisy': 17,
 'Exaggeration-Minimisation': 18}

In [None]:
from collections import Counter
tags=[]
for tag in train["mani_tags"]:
    tags=tags+tag

    

print("Number of tags: {}".format(len(set(tags))))
c = Counter(tags)

print( c.items())



tags_to_ids = {k: v for v, k in enumerate(set(tags))}
ids_to_tags = {v: k for v, k in enumerate(set(tags))}
tags_to_ids

In [26]:
from tqdm.notebook import tqdm
n_labels=len(frames_to_ids)

def one_hot_encoder(df):
    one_hot_encoding = []
    for i in tqdm(range(len(df))):
        temp = [0]*n_labels
        label_indices = df.iloc[i]["labels"].split(",")
        for index in label_indices:
            temp[frames_to_ids[index]] = 1
        one_hot_encoding.append(temp)
    return pd.DataFrame(one_hot_encoding)

In [27]:
train_ohe_labels = one_hot_encoder(train)
test_ohe_labels = one_hot_encoder(test)

train_ohe_labels

  0%|          | 0/3760 [00:00<?, ?it/s]

  0%|          | 0/1120 [00:00<?, ?it/s]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18
0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3755,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1
3756,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
3757,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
3758,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1


In [28]:
train = pd.concat([train, train_ohe_labels], axis=1)
train

Unnamed: 0,id,line,text,spans,labels,tokens,pos,mani_tags,0,1,...,9,10,11,12,13,14,15,16,17,18
0,111111111,3,Geneva - The World Health Organisation chief o...,"[{'start': 90, 'end': 98, 'label': 'Doubt'}]",Doubt,"[Geneva, -, The, World, Health, Organisation, ...","[NOUN, PUNCT, PROPN, PROPN, ADJ, NOUN, VERB, P...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...",0,0,...,0,0,0,0,0,0,0,0,0,0
1,111111111,5,"""The next transmission could be more pronounce...","[{'start': 5, 'end': 63, 'label': 'Appeal_to_A...",Appeal_to_Authority,"["", The, next, transmission, could, be, more, ...","[PUNCT, NOUN, ADJ, NOUN, NOUN, X, NOUN, NOUN, ...","[O, O, I-, I-, I-, I-, I-, I-, I-, I-, I-, I-,...",0,0,...,0,0,0,0,0,0,0,0,0,0
2,111111111,13,"But Tedros voiced alarm that ""plague in Madaga...","[{'start': 74, 'end': 96, 'label': 'Repetition'}]",Repetition,"[But, Tedros, voiced, alarm, that, "", plague, ...","[X, PROPN, ADJ, NOUN, VERB, PUNCT, NOUN, X, PR...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...",0,0,...,0,0,0,0,0,0,0,1,0,0
3,111111111,17,He also pointed to the presence of the pneumon...,[],Appeal_to_Fear-Prejudice,"[He, also, pointed, to, the, presence, of, the...","[NOUN, NOUN, NOUN, AUX, X, NOUN, X, X, NOUN, N...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...",0,0,...,0,0,0,0,1,0,0,0,0,0
4,111111111,19,He praised the rapid response from WHO and Mad...,[],Appeal_to_Fear-Prejudice,"[He, praised, the, rapid, response, from, WHO,...","[PROPN, PROPN, CCONJ, NOUN, NOUN, NOUN, PROPN,...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...",0,0,...,0,0,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3755,999001970,4,Also the Left killed comedy. This is what its ...,"[{'start': 8, 'end': 30, 'label': 'Slogans'}]","Exaggeration-Minimisation,Slogans","[Also, the, Left, killed, comedy, ., This, is,...","[NOUN, X, PROPN, NOUN, ADV, PUNCT, NOUN, X, AD...","[O, O, I-, I-, I-, I-, I-, O, O, O, O, O, O, O...",0,0,...,0,0,0,0,0,0,1,0,0,1
3756,999001970,5,Saturday Night Live writer and comedian Nimesh...,[],Exaggeration-Minimisation,"[Saturday, Night, Live, writer, and, comedian,...","[NOUN, NOUN, VERB, NOUN, CCONJ, NOUN, PROPN, N...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...",0,0,...,0,0,0,0,0,0,0,0,0,1
3757,999001970,6,That's what Columbia snowflakes thought was of...,"[{'start': 17, 'end': 36, 'label': 'Name_Calli...",Name_Calling-Labeling,"[That's, what, Columbia, snowflakes, thought, ...","[PROPN, NOUN, NOUN, NOUN, NUM, PRON, VERB, PUN...","[O, O, O, I-, I-, I-, O, O, O, O, O, O, O, O, ...",0,0,...,0,0,0,1,0,0,0,0,0,0
3758,999001970,8,"Comrades, these jokes you have been listening ...",[],"Exaggeration-Minimisation,Name_Calling-Labeling","[Comrades, ,, these, jokes, you, have, been, l...","[PROPN, PUNCT, VERB, NOUN, X, NOUN, ADJ, NOUN,...","[O, O, O, O, O, O, O, O, O, O, O, O, O]",0,0,...,0,0,0,1,0,0,0,0,0,1


In [29]:
test = pd.concat([test, test_ohe_labels], axis=1)
test

Unnamed: 0,id,line,text,spans,labels,tokens,pos,mani_tags,0,1,...,9,10,11,12,13,14,15,16,17,18
0,813452859,7,Michael Swadling: I guess her only chance is i...,"[{'start': 84, 'end': 106, 'label': 'Loaded_La...","False_Dilemma-No_Choice,Loaded_Language","[Michael, Swadling, :, I, guess, her, only, ch...","[PROPN, PROPN, PUNCT, CCONJ, NOUN, PUNCT, ADJ,...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...",1,0,...,0,0,0,0,0,0,0,0,0,0
1,813452859,9,There is a chance; as unfortunately there are ...,"[{'start': 133, 'end': 226, 'label': 'False_Di...","False_Dilemma-No_Choice,Loaded_Language,Name_C...","[There, is, a, chance, ;, as, unfortunately, t...","[PROPN, CCONJ, CCONJ, NOUN, PUNCT, NOUN, NOUN,...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...",1,0,...,0,0,0,1,0,0,0,0,0,0
2,813452859,11,Michael Swadling: The EU withdrawal act is in ...,[],Conversation_Killer,"[Michael, Swadling, :, The, EU, withdrawal, ac...","[PROPN, PROPN, PUNCT, PROPN, PROPN, NOUN, NOUN...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...",0,0,...,0,1,0,0,0,0,0,0,0,0
3,813452859,12,I often use the example of an iPhone to people...,[],"Conversation_Killer,Red_Herring","[I, often, use, the, example, of, an, iPhone, ...","[CCONJ, NOUN, DET, X, ADV, X, X, PROPN, PRON, ...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...",0,0,...,0,1,0,0,0,0,0,0,0,0
4,813452859,15,Michael Swadling: The EU makes a profit on its...,[],Obfuscation-Vagueness-Confusion,"[Michael, Swadling, :, The, EU, makes, a, prof...","[PROPN, PROPN, PUNCT, PROPN, PROPN, VERB, CCON...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...",0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1115,833053676,7,“I don’t think he’s legitimate.,[],Doubt,"[“, I, don, ’, t, think, he, ’, s, legitimate, .]","[PUNCT, X, X, PROPN, PRON, NOUN, PROPN, NOUN, ...","[O, O, O, O, O, O, O, O, O, O, O]",0,0,...,0,0,0,0,0,0,0,0,0,0
1116,833067493,4,"'Democrats, the test results are back, and Don...","[{'start': 4, 'end': 72, 'label': 'Loaded_Lang...",Loaded_Language,"[', Democrats, ,, the, test, results, are, bac...","[PUNCT, PROPN, PUNCT, CCONJ, NOUN, ADV, PROPN,...","[O, O, I-, I-, I-, I-, I-, I-, I-, I-, I-, I-,...",0,0,...,0,0,0,0,0,0,0,0,0,0
1117,833067493,5,Trump Jr added in a comment: '... and your pre...,[],Loaded_Language,"[Trump, Jr, added, in, a, comment, :, ', .., ....","[NOUN, X, NOUN, NOUN, CCONJ, NOUN, PUNCT, PUNC...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...",0,0,...,0,0,0,0,0,0,0,0,0,0
1118,833067493,8,Trump Jr (seen last year) mocked Democrats say...,[],Loaded_Language,"[Trump, Jr, (, seen, last, year, ), mocked, De...","[NOUN, PROPN, PUNCT, NOUN, X, PROPN, PUNCT, PR...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...",0,0,...,0,0,0,0,0,0,0,0,0,0


In [32]:
def inspect_category_wise_data(label, n=5):
    samples = train[train[label] == 1].sample(n)
    sentiment = ids_to_frames[label]
    
    print(f"{n} samples from {sentiment} sentiment: \n")
    for text in samples["text"]:
        print(text, end='\n\n')

inspect_category_wise_data(4)


5 samples from Obfuscation-Vagueness-Confusion sentiment: 

The cardinal’s staff asked that CNA change his answer about liturgically “blessing” gay unions to: “There are no general solutions and I think that would not be right, because we are talking about pastoral care for individual cases, and that applies to other areas as well, which we cannot regulate, where we have no sets of rules.”

What is true for Corker is doubly true for the Democrats.

The case of Amber Guyger, the police officer who admitted to shooting 26-year-old Botham Jean in his apartment for no reason, is a glaring example of “blue privilege” at work in our society. Guyger claims that she accidentally entered the wrong apartment and shot a man who she thought was a burglar, however, eyewitness testimony has contradicted her initial statement.

The cardinal’s office maintains that rather than saying “yes,” there is a possibility of liturgical “blessing” of gay unions, he answered the question in a more subtle way wit

In [35]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [36]:
if lang=="en":
  BERT_MODEL = "bert-base-cased"
  BERT_MODEL = "/mnt/lun2/kbaraniak/data/tmpen4/test-mlm"
if lang=="po":
  BERT_MODEL = "/mnt/lun2/kbaraniak/data/tmp3/test-mlm"
  #BERT_MODEL = "dkleczek/bert-base-polish-uncased-v1"
if lang=="fr":
  BERT_MODEL = "dbmdz/bert-base-french-europeana-cased"#"camembert/camembert-base"
if lang=="it":
  BERT_MODEL = "dbmdz/bert-base-italian-uncased"#
if lang=="ru":
  BERT_MODEL = "DeepPavlov/bert-base-bg-cs-pl-ru-cased"#"rubert"
if lang=="ge":
  BERT_MODEL = "dbmdz/bert-base-german-uncased"#


from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(BERT_MODEL,local_files_only=True, cache_dir="/mnt/lun2/kbaraniak/data/")

In [38]:
import transformers


## Model

In [39]:
from torch import nn

class PropagandaClassifier(nn.Module):
    def __init__(self, n_classes,num_labels, do_prob, bert_model):
        super(PropagandaClassifier, self).__init__()
        self.bert = BertModel.from_pretrained(bert_model,local_files_only=True, cache_dir="/mnt/lun2/kbaraniak/data/")
        

        self.dropout = nn.Dropout(do_prob)
        self.out = nn.Linear(768, n_classes)
        
        self.dropout2 = nn.Dropout(do_prob)
        self.tagger = nn.Linear(768, num_labels)
        self.m=nn.Softmax( dim=2)
       

    def forward(self, input_ids, attention_mask):
        output_bert = self.bert(input_ids, attention_mask=attention_mask)
        
        
        #tokens
        output_tag1=self.dropout(output_bert[0])
        output_tag=self.tagger(output_tag1)
       
        softm=self.m(output_tag)
        
        indexes=torch.argmax(softm, axis=2)
        
        ind=[]
        for i in range(0, indexes.shape[0]):
            one=False
            for j in range(0, indexes.shape[1]):
                
                if indexes[i,j]==1:
                    ind.append(j)
                    one=True
                    break
            if one==False: #jesli brak 1 to tez chcemy miec index
                ind.append(0)
    
        a=torch.range(0,len(indexes)-1,dtype=torch.long)
        
        output_1=output_bert[0][a,ind, :]
        output_2 = self.dropout(output_1)
        output = self.out(output_2)
        return output, output_tag

In [40]:
from transformers import BertTokenizerFast, BertConfig, BertModel




## Prepare dataset

In [41]:
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer
import numpy as np


class MyDataset(Dataset):

  def __init__(self,df, tokenizer, max_len):
    
    self.article=df["text"]#[t.lower() for t in df["text"]]
    

    self.tokenizer=tokenizer
    self.max_len=max_len
    self.id= df["id"]
    self.line=df["line"]

    if "labels" in df.columns:
       self.labels=df[range(len(frames_to_ids))].values.tolist()
    else:
      self.labels=[]
    self.mani_tags=[]
    if "mani_tags" in df.columns:
        self.mani_tags=df["mani_tags"]
  def __len__(self):
    return len(self.article)
  
  def __getitem__(self,idx):
    if len(self.labels)>0:
      labels=self.labels[idx]
    else:
      labels=0
    if len( self.mani_tags)>0:
        token_word_labels = self.mani_tags[idx][0].split(",") 
        token_labels = [tags_to_ids[label] for label in token_word_labels] 
    else:
        token_labels=[]
    idart=self.id[idx]
    line=self.line[idx]
     # print(labels)
    encoding = self.tokenizer(self.article[idx],
                             is_split_into_words=False,
                             #is_pretokenized=True, 
                             return_offsets_mapping=True, 
                             padding='max_length', 
                             truncation=True, 
                             max_length=self.max_len)#.set_format("torch")

    # create an empty array of -100 of length max_length
    encoded_labels = np.ones(len(encoding["offset_mapping"]), dtype=int) * -100
        
    i = -1
    if len( self.mani_tags)>0:
        for idx, mapping in enumerate(encoding["offset_mapping"]):
    
                if mapping[1]!=0:# next
                    if mapping[0] == 0:#only if begginign of a word
                        i += 1
                    encoded_labels[idx] = token_labels[i]

    

   
    

    items = {key: torch.as_tensor(val) for key, val in encoding.items()}
    items["labels"]=torch.as_tensor(labels) 
    items["id"]=idart
    items["line"]=line
    items['token_labels'] = torch.as_tensor(encoded_labels)


    
    return items

In [42]:
#setup

MAX_LEN = 256
TRAIN_BATCH_SIZE = 8
VALID_BATCH_SIZE = 8
EPOCHS = 30
LEARNING_RATE = 1e-05
MAX_GRAD_NORM = 10



## train_test split

In [43]:
print("TRAIN Dataset: {}".format(train.shape))
print("TEST Dataset: {}".format(test.shape))
training_set = MyDataset(train, tokenizer, MAX_LEN)
test_set = MyDataset(test, tokenizer, MAX_LEN)

# # myDs=MyDataset(bias_lexical, tokenizer)
train_loader=DataLoader(training_set,batch_size=TRAIN_BATCH_SIZE,shuffle=True)
test_loader=DataLoader(test_set,batch_size=VALID_BATCH_SIZE, shuffle=False)


TRAIN Dataset: (3760, 27)
TEST Dataset: (1120, 27)


## log metrics

In [45]:
from sklearn import metrics, model_selection, preprocessing
from sklearn.metrics import precision_recall_fscore_support as score


def log_metrics(preds, labels):
    preds = torch.stack(preds)
    # print(preds)
    preds = preds.cpu().detach().numpy()
    # print(preds)
    labels = torch.stack(labels)
    labels = labels.cpu().detach().numpy()


    preds=preds >0.5
    class_rep=classification_report( labels, preds, target_names= frames_to_ids.keys())
    print(class_rep)
    precision,recall,fscore,support=score(labels, preds,average='micro')
    precision,recall,fscore_macro,support=score(labels, preds,average='macro')

    return {"f1_micro":fscore, "f1_macro":fscore_macro}

In [46]:
from transformers import BertTokenizerFast, BertConfig, BertModel
from torch.nn import CrossEntropyLoss
# from transformers.models.bert.modeling_bert import BertModel
from transformers.models.bert import BertPreTrainedModel
from sklearn.metrics import classification_report




def training(epoch, test=True):
    tr_loss, tr_accuracy = 0, 0
    nb_tr_examples, nb_tr_steps = 0, 0
    tr_preds, tr_labels = [], []
    # put model in training mode
    model.train()
    print("Start")
    for idx, batch in enumerate(train_loader):

       

        ids = batch['input_ids'].to(device, dtype = torch.long)
        mask = batch['attention_mask'].to(device, dtype = torch.long)
        labels = batch['labels'].to(device, dtype = torch.long)
        tag_labels=batch['token_labels'].to(device, dtype = torch.long)

        optimizer.zero_grad()

        
        output, output_tokens = model(input_ids=ids, attention_mask=mask)
        
        loss_sequence = loss_fct(output, labels.float())
        loss_tokens = loss_fct2(output_tokens.view(-1, len(tags_to_ids)), tag_labels.view(-1))

        loss=loss_sequence+loss_tokens*0.5
        tr_loss += loss.item()
        
        tr_logits=output

        nb_tr_steps += 1
        nb_tr_examples += labels.size(0)
        
        if idx % 100==0:
            loss_step = tr_loss/nb_tr_steps
            print(f"Training loss per 100 training steps: {loss_step}")
           
    
        # gradient clipping
        torch.nn.utils.clip_grad_norm_(
            parameters=model.parameters(), max_norm=MAX_GRAD_NORM
        )
        loss.backward()

        optimizer.step()

        

    epoch_loss = tr_loss / nb_tr_steps
    tr_accuracy = tr_accuracy / nb_tr_steps
    print(f"Training loss epoch: {epoch_loss}")
    print(f"Training accuracy epoch: {tr_accuracy}")

    
    print("Training eval")
    model.eval()
    tr_preds, tr_labels = [], []
    loss=0
    for batch in train_loader:
      
      ids = batch['input_ids'].to(device, dtype = torch.long)
      mask = batch['attention_mask'].to(device, dtype = torch.long)
      labels = batch['labels'].to(device, dtype = torch.long)
      tag_labels=batch['token_labels'].to(device, dtype = torch.long)

      with torch.no_grad():
          output, output_tokens = model(input_ids=ids, attention_mask=mask,)
          loss+=loss_fct(output, labels.float())
          

          preds=torch.sigmoid(output)>0.5
          tr_labels +=[lab.cpu() for lab in labels ]
          tr_preds+=[lab for lab in preds ]
         
    
    loss_train=loss/len(train_loader)
    print(loss_train)
    res=log_metrics(tr_preds, tr_labels)

    print("Test eval")
    model.eval()
    tr_preds, tr_labels = [], []
    loss=0
    if test:
      for batch in test_loader:
        ids = batch['input_ids'].to(device, dtype = torch.long)
        mask = batch['attention_mask'].to(device, dtype = torch.long)
        labels = batch['labels'].to(device, dtype = torch.long)
        tag_labels=batch['token_labels'].to(device, dtype = torch.long)

        
        with torch.no_grad():
            
            output, output_tokens = model(input_ids=ids, attention_mask=mask,)
            loss+=loss_fct(output, labels.float())
            preds=torch.sigmoid(output)>0.5
            tr_labels.extend(labels)
            tr_preds.extend(preds)
            
      loss_test=loss/len(test_loader)
      res_test=log_metrics(tr_preds, tr_labels)
      print(loss_test)
      return loss_train, loss_test, res["f1_micro"], res["f1_macro"], res_test["f1_micro"], res_test["f1_macro"]
    else:
      return loss_train, 0, res["f1_micro"],res["f1_macro"], 0,0



    

In [47]:
class_weights=[]
for i in range(len(frames_to_ids)):
    class_weights.append((len(train)-sum(train[i]))/sum(train[i]))
class_weights

[29.81967213114754,
 62.728813559322035,
 1.078496406854616,
 12.101045296167248,
 207.88888888888889,
 84.45454545454545,
 23.415584415584416,
 234.0,
 6.258687258687258,
 249.66666666666666,
 40.31868131868132,
 16.652582159624412,
 2.840653728294178,
 11.129032258064516,
 249.66666666666666,
 23.575163398692812,
 5.911764705882353,
 93.0,
 7.068669527896995]

## Training

In [112]:
train_loss_history=[]
test_loss_history=[]
train_f1micro_history=[]
test_f1micro_history=[]

model= PropagandaClassifier(len(frames_to_ids),len(tags_to_ids), 0.1, BERT_MODEL)
model=model.to(device)


optimizer = torch.optim.Adam(params=model.parameters(), lr=LEARNING_RATE, weight_decay=0.01) # AdamW

class_weights=torch.as_tensor(class_weights).to(device, dtype = torch.float)

loss_fct =  nn.BCEWithLogitsLoss(pos_weight=class_weights)

loss_fct2 =  nn.CrossEntropyLoss()



for epoch in range(EPOCHS):
    print(f"Training epoch: {epoch + 1}")
    loss_tr, loss_te, f1_tr, f1_te=training(epoch)
    print(loss_tr, loss_te, f1_tr, f1_te)
    train_loss_history.append(loss_tr)
    test_loss_history.append(loss_te)
    train_f1micro_history.append(f1_tr)
    test_f1micro_history.append(f1_te)

Some weights of the model checkpoint at /mnt/lun2/kbaraniak/data/tmpen4/test-mlm were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint at /mnt/lun2/kbaraniak/data/tmpen4/test-mlm and are newly initialized: ['bert.pooler.de

Training epoch: 1
Start




Training loss per 100 training steps: 1.779594898223877
Training loss per 100 training steps: 1.3159680360614663
Training loss per 100 training steps: 1.306007193392189
Training loss per 100 training steps: 1.3131451388926205
Training loss per 100 training steps: 1.3072991990983636
Training loss epoch: 1.3128823470562063
Training accuracy epoch: 0.0
Training eval
tensor(1.2160, device='cuda:2')
                                 precision    recall  f1-score   support

                          Doubt       0.25      0.32      0.28       518
Obfuscation-Vagueness-Confusion       0.01      0.83      0.02        18
           Appeal_to_Popularity       0.00      1.00      0.01        15
      Exaggeration-Minimisation       0.15      0.60      0.24       466
            Conversation_Killer       0.03      0.82      0.07        91
                   Whataboutism       0.02      0.88      0.04        16
                        Slogans       0.12      0.31      0.18       153
           Guilt_



                                 precision    recall  f1-score   support

                          Doubt       0.29      0.18      0.22       187
Obfuscation-Vagueness-Confusion       0.02      0.38      0.03        13
           Appeal_to_Popularity       0.03      0.88      0.06        34
      Exaggeration-Minimisation       0.11      0.82      0.19       115
            Conversation_Killer       0.03      0.88      0.05        25
                   Whataboutism       0.01      1.00      0.01         2
                        Slogans       0.08      0.32      0.13        28
           Guilt_by_Association       0.02      0.50      0.03         4
            Appeal_to_Hypocrisy       0.01      1.00      0.01         8
                     Repetition       0.14      0.13      0.14       141
                    Flag_Waving       0.11      0.88      0.19        96
          Name_Calling-Labeling       0.25      0.84      0.38       250
      Causal_Oversimplification       0.03      1.



Training loss per 100 training steps: 2.2579851150512695
Training loss per 100 training steps: 1.241610560676839
Training loss per 100 training steps: 1.2132926165167965
Training loss per 100 training steps: 1.2074064212384017
Training loss per 100 training steps: 1.217533703902713
Training loss epoch: 1.2101513555709351
Training accuracy epoch: 0.0
Training eval
tensor(1.0678, device='cuda:2')
                                 precision    recall  f1-score   support

                          Doubt       0.20      0.81      0.32       518
Obfuscation-Vagueness-Confusion       0.03      0.94      0.06        18
           Appeal_to_Popularity       0.02      0.67      0.05        15
      Exaggeration-Minimisation       0.18      0.59      0.27       466
            Conversation_Killer       0.11      0.53      0.18        91
                   Whataboutism       0.02      0.94      0.04        16
                        Slogans       0.10      0.83      0.18       153
           Guilt_



                                 precision    recall  f1-score   support

                          Doubt       0.22      0.68      0.33       187
Obfuscation-Vagueness-Confusion       0.03      0.31      0.06        13
           Appeal_to_Popularity       0.00      0.00      0.00        34
      Exaggeration-Minimisation       0.10      0.54      0.18       115
            Conversation_Killer       0.04      0.88      0.08        25
                   Whataboutism       0.00      1.00      0.01         2
                        Slogans       0.06      0.68      0.10        28
           Guilt_by_Association       0.02      1.00      0.04         4
            Appeal_to_Hypocrisy       0.01      0.75      0.01         8
                     Repetition       0.14      0.35      0.20       141
                    Flag_Waving       0.22      0.76      0.34        96
          Name_Calling-Labeling       0.32      0.78      0.46       250
      Causal_Oversimplification       0.03      0.



Training loss per 100 training steps: 1.7802680730819702
Training loss per 100 training steps: 1.0738414838762567
Training loss per 100 training steps: 1.0655012018051906
Training loss per 100 training steps: 1.0962365865707397
Training loss per 100 training steps: 1.0943491641720038
Training loss epoch: 1.0883979659131233
Training accuracy epoch: 0.0
Training eval
tensor(0.9309, device='cuda:2')
                                 precision    recall  f1-score   support

                          Doubt       0.26      0.69      0.37       518
Obfuscation-Vagueness-Confusion       0.05      0.89      0.10        18
           Appeal_to_Popularity       0.01      1.00      0.02        15
      Exaggeration-Minimisation       0.21      0.56      0.30       466
            Conversation_Killer       0.07      0.82      0.12        91
                   Whataboutism       0.09      0.94      0.16        16
                        Slogans       0.24      0.56      0.34       153
           Guil

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                                 precision    recall  f1-score   support

                          Doubt       0.27      0.65      0.38       187
Obfuscation-Vagueness-Confusion       0.03      0.23      0.06        13
           Appeal_to_Popularity       0.04      0.12      0.06        34
      Exaggeration-Minimisation       0.13      0.40      0.20       115
            Conversation_Killer       0.03      0.92      0.06        25
                   Whataboutism       0.06      0.50      0.11         2
                        Slogans       0.16      0.32      0.21        28
           Guilt_by_Association       0.02      0.75      0.05         4
            Appeal_to_Hypocrisy       0.03      0.38      0.05         8
                     Repetition       0.17      0.13      0.15       141
                    Flag_Waving       0.18      0.86      0.30        96
          Name_Calling-Labeling       0.35      0.76      0.48       250
      Causal_Oversimplification       0.01      0.



Training loss per 100 training steps: 0.7208813428878784
Training loss per 100 training steps: 0.9750003909120465
Training loss per 100 training steps: 0.9367511325807714
Training loss per 100 training steps: 0.9459503982154238
Training loss per 100 training steps: 0.9516455317821884
Training loss epoch: 0.9551895658386514
Training accuracy epoch: 0.0
Training eval
tensor(0.8239, device='cuda:2')
                                 precision    recall  f1-score   support

                          Doubt       0.29      0.66      0.41       518
Obfuscation-Vagueness-Confusion       0.04      1.00      0.08        18
           Appeal_to_Popularity       0.04      0.93      0.07        15
      Exaggeration-Minimisation       0.19      0.78      0.30       466
            Conversation_Killer       0.05      0.93      0.09        91
                   Whataboutism       0.02      0.94      0.04        16
                        Slogans       0.10      0.95      0.18       153
           Guil

  _warn_prf(average, modifier, msg_start, len(result))


                                 precision    recall  f1-score   support

                          Doubt       0.27      0.57      0.37       187
Obfuscation-Vagueness-Confusion       0.03      0.31      0.06        13
           Appeal_to_Popularity       0.00      0.00      0.00        34
      Exaggeration-Minimisation       0.13      0.53      0.20       115
            Conversation_Killer       0.03      1.00      0.06        25
                   Whataboutism       0.01      0.50      0.02         2
                        Slogans       0.06      0.82      0.11        28
           Guilt_by_Association       0.05      0.75      0.10         4
            Appeal_to_Hypocrisy       0.04      0.25      0.07         8
                     Repetition       0.16      0.37      0.23       141
                    Flag_Waving       0.16      0.94      0.27        96
          Name_Calling-Labeling       0.41      0.60      0.49       250
      Causal_Oversimplification       0.03      0.



Training loss per 100 training steps: 0.7957789301872253
Training loss per 100 training steps: 0.8560346347270625
Training loss per 100 training steps: 0.8843014201714625
Training loss per 100 training steps: 0.8802322646312143
Training loss per 100 training steps: 0.8713066170637744
Training loss epoch: 0.870822748858878
Training accuracy epoch: 0.0
Training eval
tensor(0.7876, device='cuda:2')
                                 precision    recall  f1-score   support

                          Doubt       0.26      0.86      0.39       518
Obfuscation-Vagueness-Confusion       0.07      0.94      0.13        18
           Appeal_to_Popularity       0.08      0.93      0.15        15
      Exaggeration-Minimisation       0.19      0.79      0.30       466
            Conversation_Killer       0.18      0.64      0.28        91
                   Whataboutism       0.02      1.00      0.04        16
                        Slogans       0.19      0.81      0.31       153
           Guilt

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                                 precision    recall  f1-score   support

                          Doubt       0.27      0.72      0.39       187
Obfuscation-Vagueness-Confusion       0.06      0.15      0.09        13
           Appeal_to_Popularity       0.00      0.00      0.00        34
      Exaggeration-Minimisation       0.14      0.56      0.22       115
            Conversation_Killer       0.06      0.80      0.11        25
                   Whataboutism       0.01      0.50      0.02         2
                        Slogans       0.12      0.54      0.19        28
           Guilt_by_Association       0.12      0.75      0.21         4
            Appeal_to_Hypocrisy       0.07      0.25      0.11         8
                     Repetition       0.15      0.20      0.17       141
                    Flag_Waving       0.29      0.70      0.41        96
          Name_Calling-Labeling       0.41      0.67      0.51       250
      Causal_Oversimplification       0.05      0.



Training loss per 100 training steps: 0.49536892771720886
Training loss per 100 training steps: 0.8044403334065239
Training loss per 100 training steps: 0.7856552331008722
Training loss per 100 training steps: 0.7892682661249788
Training loss per 100 training steps: 0.8090636888495705
Training loss epoch: 0.8220683833386035
Training accuracy epoch: 0.0
Training eval
tensor(0.7400, device='cuda:2')
                                 precision    recall  f1-score   support

                          Doubt       0.36      0.59      0.45       518
Obfuscation-Vagueness-Confusion       0.02      1.00      0.03        18
           Appeal_to_Popularity       0.02      1.00      0.03        15
      Exaggeration-Minimisation       0.23      0.68      0.34       466
            Conversation_Killer       0.13      0.80      0.22        91
                   Whataboutism       0.02      1.00      0.03        16
                        Slogans       0.17      0.90      0.29       153
           Gui

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                                 precision    recall  f1-score   support

                          Doubt       0.34      0.43      0.38       187
Obfuscation-Vagueness-Confusion       0.02      0.38      0.04        13
           Appeal_to_Popularity       0.06      0.06      0.06        34
      Exaggeration-Minimisation       0.15      0.38      0.21       115
            Conversation_Killer       0.05      0.80      0.10        25
                   Whataboutism       0.01      0.50      0.02         2
                        Slogans       0.09      0.68      0.16        28
           Guilt_by_Association       0.23      0.75      0.35         4
            Appeal_to_Hypocrisy       0.09      0.38      0.14         8
                     Repetition       0.16      0.38      0.22       141
                    Flag_Waving       0.22      0.89      0.35        96
          Name_Calling-Labeling       0.53      0.53      0.53       250
      Causal_Oversimplification       0.05      0.



Training loss per 100 training steps: 0.5717795491218567
Training loss per 100 training steps: 0.7510000726964214
Training loss per 100 training steps: 0.7462342580159506
Training loss per 100 training steps: 0.7435373370829611
Training loss per 100 training steps: 0.7432090291507226
Training loss epoch: 0.742673124468073
Training accuracy epoch: 0.0
Training eval
tensor(0.6735, device='cuda:2')
                                 precision    recall  f1-score   support

                          Doubt       0.35      0.65      0.46       518
Obfuscation-Vagueness-Confusion       0.14      1.00      0.25        18
           Appeal_to_Popularity       0.12      1.00      0.22        15
      Exaggeration-Minimisation       0.19      0.81      0.31       466
            Conversation_Killer       0.15      0.81      0.25        91
                   Whataboutism       0.24      1.00      0.39        16
                        Slogans       0.17      0.94      0.29       153
           Guilt

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                                 precision    recall  f1-score   support

                          Doubt       0.32      0.49      0.39       187
Obfuscation-Vagueness-Confusion       0.04      0.23      0.06        13
           Appeal_to_Popularity       0.00      0.00      0.00        34
      Exaggeration-Minimisation       0.16      0.51      0.24       115
            Conversation_Killer       0.05      0.88      0.09        25
                   Whataboutism       0.00      0.00      0.00         2
                        Slogans       0.09      0.68      0.15        28
           Guilt_by_Association       0.12      0.75      0.21         4
            Appeal_to_Hypocrisy       0.03      0.50      0.05         8
                     Repetition       0.14      0.10      0.12       141
                    Flag_Waving       0.17      0.95      0.29        96
          Name_Calling-Labeling       0.38      0.69      0.49       250
      Causal_Oversimplification       0.03      0.



Training loss per 100 training steps: 0.5768918991088867
Training loss per 100 training steps: 0.7134822137875132
Training loss per 100 training steps: 0.7206667999723064
Training loss per 100 training steps: 0.7490404235366175
Training loss per 100 training steps: 0.7502071861911593
Training loss epoch: 0.7482936837571732
Training accuracy epoch: 0.0
Training eval
tensor(0.7138, device='cuda:2')
                                 precision    recall  f1-score   support

                          Doubt       0.23      0.92      0.36       518
Obfuscation-Vagueness-Confusion       0.05      1.00      0.09        18
           Appeal_to_Popularity       0.04      1.00      0.08        15
      Exaggeration-Minimisation       0.25      0.68      0.36       466
            Conversation_Killer       0.24      0.87      0.37        91
                   Whataboutism       0.01      1.00      0.01        16
                        Slogans       0.22      0.97      0.35       153
           Guil

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                                 precision    recall  f1-score   support

                          Doubt       0.24      0.82      0.37       187
Obfuscation-Vagueness-Confusion       0.03      0.23      0.06        13
           Appeal_to_Popularity       0.00      0.00      0.00        34
      Exaggeration-Minimisation       0.17      0.38      0.24       115
            Conversation_Killer       0.05      0.72      0.09        25
                   Whataboutism       0.00      1.00      0.01         2
                        Slogans       0.14      0.43      0.21        28
           Guilt_by_Association       0.11      0.75      0.19         4
            Appeal_to_Hypocrisy       0.09      0.50      0.16         8
                     Repetition       0.16      0.21      0.18       141
                    Flag_Waving       0.26      0.80      0.39        96
          Name_Calling-Labeling       0.54      0.48      0.51       250
      Causal_Oversimplification       0.04      0.



Training loss per 100 training steps: 0.7235702872276306
Training loss per 100 training steps: 0.6859605610370636
Training loss per 100 training steps: 0.6679904248880509
Training loss per 100 training steps: 0.6787575072426336
Training loss per 100 training steps: 0.6811940210715791
Training loss epoch: 0.6790030652538259
Training accuracy epoch: 0.0
Training eval
tensor(0.6379, device='cuda:2')
                                 precision    recall  f1-score   support

                          Doubt       0.26      0.92      0.40       518
Obfuscation-Vagueness-Confusion       0.23      1.00      0.37        18
           Appeal_to_Popularity       0.31      1.00      0.48        15
      Exaggeration-Minimisation       0.37      0.26      0.31       466
            Conversation_Killer       0.23      0.95      0.37        91
                   Whataboutism       0.27      1.00      0.43        16
                        Slogans       0.40      0.88      0.55       153
           Guil

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                                 precision    recall  f1-score   support

                          Doubt       0.30      0.68      0.42       187
Obfuscation-Vagueness-Confusion       0.11      0.08      0.09        13
           Appeal_to_Popularity       0.00      0.00      0.00        34
      Exaggeration-Minimisation       0.15      0.11      0.13       115
            Conversation_Killer       0.03      0.40      0.06        25
                   Whataboutism       0.00      0.00      0.00         2
                        Slogans       0.20      0.39      0.27        28
           Guilt_by_Association       0.25      0.75      0.38         4
            Appeal_to_Hypocrisy       0.14      0.25      0.18         8
                     Repetition       0.16      0.35      0.22       141
                    Flag_Waving       0.34      0.79      0.47        96
          Name_Calling-Labeling       0.43      0.64      0.51       250
      Causal_Oversimplification       0.04      0.



Training loss per 100 training steps: 0.731935441493988
Training loss per 100 training steps: 0.6279753428874629
Training loss per 100 training steps: 0.6230263652196572
Training loss per 100 training steps: 0.6314582178164955
Training loss per 100 training steps: 0.6281916589065085
Training loss epoch: 0.6277413243943072
Training accuracy epoch: 0.0
Training eval
tensor(0.5399, device='cuda:2')
                                 precision    recall  f1-score   support

                          Doubt       0.34      0.79      0.47       518
Obfuscation-Vagueness-Confusion       0.15      1.00      0.26        18
           Appeal_to_Popularity       0.24      1.00      0.38        15
      Exaggeration-Minimisation       0.28      0.70      0.40       466
            Conversation_Killer       0.21      1.00      0.34        91
                   Whataboutism       0.33      1.00      0.49        16
                        Slogans       0.28      0.97      0.43       153
           Guilt

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                                 precision    recall  f1-score   support

                          Doubt       0.34      0.48      0.40       187
Obfuscation-Vagueness-Confusion       0.08      0.08      0.08        13
           Appeal_to_Popularity       0.00      0.00      0.00        34
      Exaggeration-Minimisation       0.18      0.52      0.27       115
            Conversation_Killer       0.05      0.72      0.09        25
                   Whataboutism       0.00      0.00      0.00         2
                        Slogans       0.14      0.61      0.23        28
           Guilt_by_Association       0.38      0.75      0.50         4
            Appeal_to_Hypocrisy       0.00      0.00      0.00         8
                     Repetition       0.17      0.21      0.19       141
                    Flag_Waving       0.27      0.80      0.40        96
          Name_Calling-Labeling       0.36      0.78      0.49       250
      Causal_Oversimplification       0.06      0.



Training loss per 100 training steps: 0.5954131484031677
Training loss per 100 training steps: 0.5729564271941043
Training loss per 100 training steps: 0.5604251461539103
Training loss per 100 training steps: 0.5824298891316221
Training loss per 100 training steps: 0.5964436916044525
Training loss epoch: 0.6045565995130133
Training accuracy epoch: 0.0
Training eval
tensor(0.5491, device='cuda:2')
                                 precision    recall  f1-score   support

                          Doubt       0.32      0.86      0.47       518
Obfuscation-Vagueness-Confusion       0.13      1.00      0.24        18
           Appeal_to_Popularity       0.15      1.00      0.26        15
      Exaggeration-Minimisation       0.19      0.95      0.31       466
            Conversation_Killer       0.07      1.00      0.14        91
                   Whataboutism       0.09      1.00      0.16        16
                        Slogans       0.30      0.99      0.46       153
           Guil

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                                 precision    recall  f1-score   support

                          Doubt       0.30      0.53      0.39       187
Obfuscation-Vagueness-Confusion       0.10      0.08      0.09        13
           Appeal_to_Popularity       0.00      0.00      0.00        34
      Exaggeration-Minimisation       0.14      0.80      0.24       115
            Conversation_Killer       0.03      0.88      0.07        25
                   Whataboutism       0.00      0.00      0.00         2
                        Slogans       0.12      0.39      0.19        28
           Guilt_by_Association       0.27      0.75      0.40         4
            Appeal_to_Hypocrisy       0.10      0.50      0.17         8
                     Repetition       0.13      0.09      0.11       141
                    Flag_Waving       0.24      0.90      0.37        96
          Name_Calling-Labeling       0.49      0.48      0.49       250
      Causal_Oversimplification       0.10      0.



Training loss per 100 training steps: 0.4712960124015808
Training loss per 100 training steps: 0.5943891683427414
Training loss per 100 training steps: 0.5808334779086991
Training loss per 100 training steps: 0.566556506081673
Training loss per 100 training steps: 0.5759853797957785
Training loss epoch: 0.5882298784053072
Training accuracy epoch: 0.0
Training eval
tensor(0.5098, device='cuda:2')
                                 precision    recall  f1-score   support

                          Doubt       0.36      0.78      0.49       518
Obfuscation-Vagueness-Confusion       0.05      1.00      0.10        18
           Appeal_to_Popularity       0.06      1.00      0.11        15
      Exaggeration-Minimisation       0.22      0.91      0.35       466
            Conversation_Killer       0.15      1.00      0.26        91
                   Whataboutism       0.05      1.00      0.09        16
                        Slogans       0.48      0.98      0.64       153
           Guilt

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                                 precision    recall  f1-score   support

                          Doubt       0.32      0.47      0.38       187
Obfuscation-Vagueness-Confusion       0.03      0.08      0.04        13
           Appeal_to_Popularity       0.00      0.00      0.00        34
      Exaggeration-Minimisation       0.15      0.63      0.24       115
            Conversation_Killer       0.05      0.64      0.09        25
                   Whataboutism       0.00      0.00      0.00         2
                        Slogans       0.24      0.32      0.28        28
           Guilt_by_Association       0.43      0.75      0.55         4
            Appeal_to_Hypocrisy       0.00      0.00      0.00         8
                     Repetition       0.18      0.07      0.10       141
                    Flag_Waving       0.38      0.65      0.48        96
          Name_Calling-Labeling       0.35      0.80      0.48       250
      Causal_Oversimplification       0.09      0.



Training loss per 100 training steps: 0.4119436740875244
Training loss per 100 training steps: 0.5355332366900869
Training loss per 100 training steps: 0.5306439112074932
Training loss per 100 training steps: 0.5341330720142669
Training loss per 100 training steps: 0.5284340502763925
Training loss epoch: 0.5300873765920071
Training accuracy epoch: 0.0
Training eval
tensor(0.4400, device='cuda:2')
                                 precision    recall  f1-score   support

                          Doubt       0.33      0.92      0.48       518
Obfuscation-Vagueness-Confusion       0.16      1.00      0.27        18
           Appeal_to_Popularity       0.19      1.00      0.31        15
      Exaggeration-Minimisation       0.34      0.72      0.46       466
            Conversation_Killer       0.33      1.00      0.49        91
                   Whataboutism       0.13      1.00      0.23        16
                        Slogans       0.40      0.99      0.56       153
           Guil

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                                 precision    recall  f1-score   support

                          Doubt       0.32      0.49      0.39       187
Obfuscation-Vagueness-Confusion       0.07      0.08      0.07        13
           Appeal_to_Popularity       0.00      0.00      0.00        34
      Exaggeration-Minimisation       0.20      0.31      0.24       115
            Conversation_Killer       0.05      0.40      0.08        25
                   Whataboutism       0.00      0.00      0.00         2
                        Slogans       0.17      0.50      0.26        28
           Guilt_by_Association       0.43      0.75      0.55         4
            Appeal_to_Hypocrisy       0.12      0.25      0.17         8
                     Repetition       0.15      0.35      0.21       141
                    Flag_Waving       0.33      0.77      0.46        96
          Name_Calling-Labeling       0.46      0.58      0.51       250
      Causal_Oversimplification       0.11      0.



Training loss per 100 training steps: 0.34776297211647034
Training loss per 100 training steps: 0.4703644578999812
Training loss per 100 training steps: 0.48209940408592794
Training loss per 100 training steps: 0.4892367086933301
Training loss per 100 training steps: 0.493596450794962
Training loss epoch: 0.4974290684816685
Training accuracy epoch: 0.0
Training eval
tensor(0.4450, device='cuda:2')
                                 precision    recall  f1-score   support

                          Doubt       0.46      0.82      0.59       518
Obfuscation-Vagueness-Confusion       0.31      1.00      0.47        18
           Appeal_to_Popularity       0.09      1.00      0.16        15
      Exaggeration-Minimisation       0.30      0.85      0.44       466
            Conversation_Killer       0.37      1.00      0.54        91
                   Whataboutism       0.34      1.00      0.51        16
                        Slogans       0.37      1.00      0.54       153
           Gui

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                                 precision    recall  f1-score   support

                          Doubt       0.31      0.41      0.36       187
Obfuscation-Vagueness-Confusion       0.00      0.00      0.00        13
           Appeal_to_Popularity       0.00      0.00      0.00        34
      Exaggeration-Minimisation       0.17      0.49      0.26       115
            Conversation_Killer       0.05      0.40      0.08        25
                   Whataboutism       0.00      0.00      0.00         2
                        Slogans       0.21      0.43      0.29        28
           Guilt_by_Association       0.30      0.75      0.43         4
            Appeal_to_Hypocrisy       0.00      0.00      0.00         8
                     Repetition       0.15      0.12      0.13       141
                    Flag_Waving       0.25      0.84      0.39        96
          Name_Calling-Labeling       0.51      0.42      0.46       250
      Causal_Oversimplification       0.05      0.



Training loss per 100 training steps: 0.3641529083251953
Training loss per 100 training steps: 0.4652148361253266
Training loss per 100 training steps: 0.4666732656421946
Training loss per 100 training steps: 0.46098403944525607
Training loss per 100 training steps: 0.46555866334503726
Training loss epoch: 0.4724139961790531
Training accuracy epoch: 0.0
Training eval
tensor(0.4308, device='cuda:2')
                                 precision    recall  f1-score   support

                          Doubt       0.64      0.69      0.66       518
Obfuscation-Vagueness-Confusion       0.12      1.00      0.21        18
           Appeal_to_Popularity       0.08      1.00      0.15        15
      Exaggeration-Minimisation       0.38      0.86      0.53       466
            Conversation_Killer       0.53      0.99      0.69        91
                   Whataboutism       0.10      1.00      0.18        16
                        Slogans       0.21      1.00      0.35       153
           Gu

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                                 precision    recall  f1-score   support

                          Doubt       0.35      0.24      0.28       187
Obfuscation-Vagueness-Confusion       0.07      0.08      0.07        13
           Appeal_to_Popularity       0.00      0.00      0.00        34
      Exaggeration-Minimisation       0.19      0.44      0.26       115
            Conversation_Killer       0.03      0.12      0.05        25
                   Whataboutism       0.00      0.00      0.00         2
                        Slogans       0.12      0.64      0.20        28
           Guilt_by_Association       0.30      0.75      0.43         4
            Appeal_to_Hypocrisy       0.09      0.38      0.14         8
                     Repetition       0.15      0.25      0.19       141
                    Flag_Waving       0.33      0.77      0.47        96
          Name_Calling-Labeling       0.44      0.56      0.49       250
      Causal_Oversimplification       0.13      0.



Training loss per 100 training steps: 0.47289809584617615
Training loss per 100 training steps: 0.43126645802271246
Training loss per 100 training steps: 0.42741616085097567
Training loss per 100 training steps: 0.4324975550372735
Training loss per 100 training steps: 0.4434493164023259
Training loss epoch: 0.44356781944949575
Training accuracy epoch: 0.0
Training eval
tensor(0.3932, device='cuda:2')
                                 precision    recall  f1-score   support

                          Doubt       0.72      0.68      0.70       518
Obfuscation-Vagueness-Confusion       0.21      1.00      0.35        18
           Appeal_to_Popularity       0.09      1.00      0.17        15
      Exaggeration-Minimisation       0.23      0.98      0.37       466
            Conversation_Killer       0.59      1.00      0.74        91
                   Whataboutism       0.13      1.00      0.23        16
                        Slogans       0.18      1.00      0.31       153
           

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                                 precision    recall  f1-score   support

                          Doubt       0.36      0.20      0.26       187
Obfuscation-Vagueness-Confusion       0.00      0.00      0.00        13
           Appeal_to_Popularity       0.00      0.00      0.00        34
      Exaggeration-Minimisation       0.15      0.67      0.24       115
            Conversation_Killer       0.03      0.08      0.04        25
                   Whataboutism       0.00      0.00      0.00         2
                        Slogans       0.10      0.54      0.17        28
           Guilt_by_Association       0.27      0.75      0.40         4
            Appeal_to_Hypocrisy       0.05      0.12      0.07         8
                     Repetition       0.16      0.37      0.22       141
                    Flag_Waving       0.31      0.83      0.46        96
          Name_Calling-Labeling       0.39      0.65      0.49       250
      Causal_Oversimplification       0.17      0.



Training loss per 100 training steps: 0.6274340748786926
Training loss per 100 training steps: 0.4236298634274171
Training loss per 100 training steps: 0.41582480376929193
Training loss per 100 training steps: 0.41437868853146054
Training loss per 100 training steps: 0.4131655139370155
Training loss epoch: 0.41760431065204295
Training accuracy epoch: 0.0
Training eval
tensor(0.3446, device='cuda:2')
                                 precision    recall  f1-score   support

                          Doubt       0.58      0.93      0.72       518
Obfuscation-Vagueness-Confusion       0.12      1.00      0.22        18
           Appeal_to_Popularity       0.10      1.00      0.18        15
      Exaggeration-Minimisation       0.49      0.89      0.64       466
            Conversation_Killer       0.30      1.00      0.46        91
                   Whataboutism       0.10      1.00      0.18        16
                        Slogans       0.38      1.00      0.55       153
           G

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                                 precision    recall  f1-score   support

                          Doubt       0.29      0.27      0.28       187
Obfuscation-Vagueness-Confusion       0.06      0.08      0.07        13
           Appeal_to_Popularity       0.00      0.00      0.00        34
      Exaggeration-Minimisation       0.18      0.30      0.23       115
            Conversation_Killer       0.04      0.44      0.07        25
                   Whataboutism       0.00      0.00      0.00         2
                        Slogans       0.19      0.39      0.26        28
           Guilt_by_Association       0.30      0.75      0.43         4
            Appeal_to_Hypocrisy       0.09      0.12      0.11         8
                     Repetition       0.19      0.07      0.10       141
                    Flag_Waving       0.36      0.72      0.48        96
          Name_Calling-Labeling       0.44      0.49      0.47       250
      Causal_Oversimplification       0.08      0.



Training loss per 100 training steps: 0.5391333103179932
Training loss per 100 training steps: 0.3868803802398172
Training loss per 100 training steps: 0.3889057253723714
Training loss per 100 training steps: 0.38600445768959896
Training loss per 100 training steps: 0.3858913243039885
Training loss epoch: 0.3878484165731897
Training accuracy epoch: 0.0
Training eval
tensor(0.3151, device='cuda:2')
                                 precision    recall  f1-score   support

                          Doubt       0.37      0.99      0.53       518
Obfuscation-Vagueness-Confusion       0.19      1.00      0.32        18
           Appeal_to_Popularity       0.19      1.00      0.32        15
      Exaggeration-Minimisation       0.36      0.98      0.53       466
            Conversation_Killer       0.55      1.00      0.71        91
                   Whataboutism       0.13      1.00      0.23        16
                        Slogans       0.74      1.00      0.85       153
           Gui

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                                 precision    recall  f1-score   support

                          Doubt       0.24      0.57      0.34       187
Obfuscation-Vagueness-Confusion       0.33      0.08      0.12        13
           Appeal_to_Popularity       0.00      0.00      0.00        34
      Exaggeration-Minimisation       0.17      0.45      0.25       115
            Conversation_Killer       0.02      0.04      0.02        25
                   Whataboutism       0.00      0.00      0.00         2
                        Slogans       0.32      0.21      0.26        28
           Guilt_by_Association       0.38      0.75      0.50         4
            Appeal_to_Hypocrisy       0.00      0.00      0.00         8
                     Repetition       0.16      0.13      0.14       141
                    Flag_Waving       0.38      0.60      0.47        96
          Name_Calling-Labeling       0.47      0.50      0.48       250
      Causal_Oversimplification       0.08      0.



Training loss per 100 training steps: 0.33514276146888733
Training loss per 100 training steps: 0.35075370760837404
Training loss per 100 training steps: 0.3580649025108091
Training loss per 100 training steps: 0.36445485055446625
Training loss per 100 training steps: 0.37134043151452356
Training loss epoch: 0.3703593054984478
Training accuracy epoch: 0.0
Training eval
tensor(0.2859, device='cuda:2')
                                 precision    recall  f1-score   support

                          Doubt       0.53      0.99      0.69       518
Obfuscation-Vagueness-Confusion       0.28      1.00      0.44        18
           Appeal_to_Popularity       0.22      1.00      0.36        15
      Exaggeration-Minimisation       0.62      0.95      0.75       466
            Conversation_Killer       0.54      1.00      0.70        91
                   Whataboutism       0.15      1.00      0.26        16
                        Slogans       0.41      1.00      0.58       153
           

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                                 precision    recall  f1-score   support

                          Doubt       0.24      0.35      0.29       187
Obfuscation-Vagueness-Confusion       0.00      0.00      0.00        13
           Appeal_to_Popularity       0.00      0.00      0.00        34
      Exaggeration-Minimisation       0.18      0.29      0.22       115
            Conversation_Killer       0.02      0.04      0.02        25
                   Whataboutism       0.00      0.00      0.00         2
                        Slogans       0.20      0.36      0.26        28
           Guilt_by_Association       0.20      0.25      0.22         4
            Appeal_to_Hypocrisy       0.00      0.00      0.00         8
                     Repetition       0.14      0.06      0.09       141
                    Flag_Waving       0.36      0.64      0.46        96
          Name_Calling-Labeling       0.43      0.48      0.45       250
      Causal_Oversimplification       0.11      0.



Training loss per 100 training steps: 0.221308633685112
Training loss per 100 training steps: 0.3703864976616189
Training loss per 100 training steps: 0.3616649164489253
Training loss per 100 training steps: 0.35522418813253953
Training loss per 100 training steps: 0.3504918523857421
Training loss epoch: 0.35310604286954755
Training accuracy epoch: 0.0
Training eval
tensor(0.2997, device='cuda:2')
                                 precision    recall  f1-score   support

                          Doubt       0.54      0.99      0.70       518
Obfuscation-Vagueness-Confusion       0.11      1.00      0.20        18
           Appeal_to_Popularity       0.04      1.00      0.08        15
      Exaggeration-Minimisation       0.55      0.98      0.71       466
            Conversation_Killer       0.58      1.00      0.74        91
                   Whataboutism       0.09      1.00      0.17        16
                        Slogans       0.76      1.00      0.86       153
           Gui

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                                 precision    recall  f1-score   support

                          Doubt       0.25      0.33      0.28       187
Obfuscation-Vagueness-Confusion       0.17      0.08      0.11        13
           Appeal_to_Popularity       0.00      0.00      0.00        34
      Exaggeration-Minimisation       0.17      0.24      0.20       115
            Conversation_Killer       0.07      0.16      0.09        25
                   Whataboutism       0.00      0.00      0.00         2
                        Slogans       0.24      0.14      0.18        28
           Guilt_by_Association       0.33      0.50      0.40         4
            Appeal_to_Hypocrisy       0.00      0.00      0.00         8
                     Repetition       0.14      0.25      0.18       141
                    Flag_Waving       0.45      0.41      0.43        96
          Name_Calling-Labeling       0.46      0.45      0.45       250
      Causal_Oversimplification       0.07      0.



Training loss per 100 training steps: 0.49346035718917847
Training loss per 100 training steps: 0.3412521574461814
Training loss per 100 training steps: 0.335312513421424
Training loss per 100 training steps: 0.3335048697418549
Training loss per 100 training steps: 0.3360574404126094
Training loss epoch: 0.33908424570839457
Training accuracy epoch: 0.0
Training eval
tensor(0.2735, device='cuda:2')
                                 precision    recall  f1-score   support

                          Doubt       0.64      0.99      0.78       518
Obfuscation-Vagueness-Confusion       0.30      1.00      0.46        18
           Appeal_to_Popularity       0.22      1.00      0.36        15
      Exaggeration-Minimisation       0.69      0.98      0.81       466
            Conversation_Killer       0.63      0.99      0.77        91
                   Whataboutism       0.28      1.00      0.44        16
                        Slogans       0.65      1.00      0.79       153
           Gui

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                                 precision    recall  f1-score   support

                          Doubt       0.26      0.32      0.29       187
Obfuscation-Vagueness-Confusion       0.00      0.00      0.00        13
           Appeal_to_Popularity       0.00      0.00      0.00        34
      Exaggeration-Minimisation       0.21      0.26      0.23       115
            Conversation_Killer       0.00      0.00      0.00        25
                   Whataboutism       0.00      0.00      0.00         2
                        Slogans       0.30      0.25      0.27        28
           Guilt_by_Association       0.00      0.00      0.00         4
            Appeal_to_Hypocrisy       0.00      0.00      0.00         8
                     Repetition       0.18      0.07      0.10       141
                    Flag_Waving       0.40      0.59      0.48        96
          Name_Calling-Labeling       0.28      0.86      0.42       250
      Causal_Oversimplification       0.06      0.



Training loss per 100 training steps: 0.27646204829216003
Training loss per 100 training steps: 0.3099773387507637
Training loss per 100 training steps: 0.31530229569371065
Training loss per 100 training steps: 0.3148878131593977
Training loss per 100 training steps: 0.315698267627238
Training loss epoch: 0.3183232286192001
Training accuracy epoch: 0.0
Training eval
tensor(0.2578, device='cuda:2')
                                 precision    recall  f1-score   support

                          Doubt       0.72      0.99      0.83       518
Obfuscation-Vagueness-Confusion       0.32      1.00      0.48        18
           Appeal_to_Popularity       0.33      1.00      0.49        15
      Exaggeration-Minimisation       0.38      1.00      0.55       466
            Conversation_Killer       0.66      1.00      0.79        91
                   Whataboutism       0.26      1.00      0.42        16
                        Slogans       0.78      1.00      0.88       153
           Gui

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                                 precision    recall  f1-score   support

                          Doubt       0.24      0.33      0.28       187
Obfuscation-Vagueness-Confusion       0.00      0.00      0.00        13
           Appeal_to_Popularity       0.00      0.00      0.00        34
      Exaggeration-Minimisation       0.19      0.40      0.26       115
            Conversation_Killer       0.00      0.00      0.00        25
                   Whataboutism       0.00      0.00      0.00         2
                        Slogans       0.29      0.07      0.11        28
           Guilt_by_Association       0.00      0.00      0.00         4
            Appeal_to_Hypocrisy       0.00      0.00      0.00         8
                     Repetition       0.14      0.06      0.09       141
                    Flag_Waving       0.38      0.49      0.43        96
          Name_Calling-Labeling       0.45      0.45      0.45       250
      Causal_Oversimplification       0.00      0.



Training loss per 100 training steps: 0.22834736108779907
Training loss per 100 training steps: 0.30659859870920086
Training loss per 100 training steps: 0.30863931856641724
Training loss per 100 training steps: 0.31103022035174194
Training loss per 100 training steps: 0.3137188254300496
Training loss epoch: 0.3202018030780427
Training accuracy epoch: 0.0
Training eval
tensor(0.2297, device='cuda:2')
                                 precision    recall  f1-score   support

                          Doubt       0.62      1.00      0.77       518
Obfuscation-Vagueness-Confusion       0.23      1.00      0.37        18
           Appeal_to_Popularity       0.21      1.00      0.35        15
      Exaggeration-Minimisation       0.64      1.00      0.78       466
            Conversation_Killer       0.62      1.00      0.77        91
                   Whataboutism       0.12      1.00      0.22        16
                        Slogans       0.61      1.00      0.76       153
           

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                                 precision    recall  f1-score   support

                          Doubt       0.23      0.32      0.26       187
Obfuscation-Vagueness-Confusion       0.00      0.00      0.00        13
           Appeal_to_Popularity       0.00      0.00      0.00        34
      Exaggeration-Minimisation       0.17      0.26      0.21       115
            Conversation_Killer       0.03      0.04      0.04        25
                   Whataboutism       0.00      0.00      0.00         2
                        Slogans       0.24      0.25      0.25        28
           Guilt_by_Association       0.43      0.75      0.55         4
            Appeal_to_Hypocrisy       0.00      0.00      0.00         8
                     Repetition       0.15      0.07      0.10       141
                    Flag_Waving       0.41      0.36      0.39        96
          Name_Calling-Labeling       0.38      0.62      0.47       250
      Causal_Oversimplification       0.06      0.



Training loss per 100 training steps: 0.2044469565153122
Training loss per 100 training steps: 0.2631976046774647
Training loss per 100 training steps: 0.27421060026581606
Training loss per 100 training steps: 0.28218361789800006
Training loss per 100 training steps: 0.2924254211107097
Training loss epoch: 0.29846889566867907
Training accuracy epoch: 0.0
Training eval
tensor(0.2280, device='cuda:2')
                                 precision    recall  f1-score   support

                          Doubt       0.94      0.92      0.93       518
Obfuscation-Vagueness-Confusion       0.23      1.00      0.37        18
           Appeal_to_Popularity       0.18      1.00      0.31        15
      Exaggeration-Minimisation       0.62      1.00      0.77       466
            Conversation_Killer       0.53      1.00      0.69        91
                   Whataboutism       0.13      1.00      0.23        16
                        Slogans       0.46      1.00      0.63       153
           G

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                                 precision    recall  f1-score   support

                          Doubt       0.39      0.12      0.18       187
Obfuscation-Vagueness-Confusion       0.00      0.00      0.00        13
           Appeal_to_Popularity       0.00      0.00      0.00        34
      Exaggeration-Minimisation       0.23      0.28      0.25       115
            Conversation_Killer       0.00      0.00      0.00        25
                   Whataboutism       0.00      0.00      0.00         2
                        Slogans       0.18      0.36      0.24        28
           Guilt_by_Association       0.50      0.75      0.60         4
            Appeal_to_Hypocrisy       0.00      0.00      0.00         8
                     Repetition       0.13      0.09      0.10       141
                    Flag_Waving       0.44      0.38      0.41        96
          Name_Calling-Labeling       0.41      0.49      0.45       250
      Causal_Oversimplification       0.03      0.



Training loss per 100 training steps: 0.2860342860221863
Training loss per 100 training steps: 0.2573862581855
Training loss per 100 training steps: 0.2644024460173365
Training loss per 100 training steps: 0.26750332166586205
Training loss per 100 training steps: 0.2710269507847522
Training loss epoch: 0.27633748802732916
Training accuracy epoch: 0.0
Training eval
tensor(0.2114, device='cuda:2')
                                 precision    recall  f1-score   support

                          Doubt       0.90      1.00      0.95       518
Obfuscation-Vagueness-Confusion       0.08      1.00      0.14        18
           Appeal_to_Popularity       0.12      1.00      0.22        15
      Exaggeration-Minimisation       0.61      1.00      0.76       466
            Conversation_Killer       0.48      1.00      0.65        91
                   Whataboutism       0.08      1.00      0.16        16
                        Slogans       0.48      1.00      0.64       153
           Guilt

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                                 precision    recall  f1-score   support

                          Doubt       0.37      0.19      0.25       187
Obfuscation-Vagueness-Confusion       0.00      0.00      0.00        13
           Appeal_to_Popularity       0.00      0.00      0.00        34
      Exaggeration-Minimisation       0.20      0.26      0.23       115
            Conversation_Killer       0.02      0.04      0.03        25
                   Whataboutism       0.00      0.00      0.00         2
                        Slogans       0.23      0.36      0.28        28
           Guilt_by_Association       0.25      0.25      0.25         4
            Appeal_to_Hypocrisy       0.00      0.00      0.00         8
                     Repetition       0.13      0.06      0.09       141
                    Flag_Waving       0.41      0.43      0.42        96
          Name_Calling-Labeling       0.40      0.51      0.45       250
      Causal_Oversimplification       0.05      0.



Training loss per 100 training steps: 0.4425853490829468
Training loss per 100 training steps: 0.25821549482274764
Training loss per 100 training steps: 0.2702782828712938
Training loss per 100 training steps: 0.26681930465357645
Training loss per 100 training steps: 0.2658219900809024
Training loss epoch: 0.2707360078679754
Training accuracy epoch: 0.0
Training eval
tensor(0.2112, device='cuda:2')
                                 precision    recall  f1-score   support

                          Doubt       0.86      1.00      0.92       518
Obfuscation-Vagueness-Confusion       0.23      1.00      0.38        18
           Appeal_to_Popularity       0.16      1.00      0.28        15
      Exaggeration-Minimisation       0.62      1.00      0.76       466
            Conversation_Killer       0.68      0.98      0.80        91
                   Whataboutism       0.18      1.00      0.31        16
                        Slogans       0.34      1.00      0.50       153
           Gu

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                                 precision    recall  f1-score   support

                          Doubt       0.28      0.24      0.25       187
Obfuscation-Vagueness-Confusion       0.00      0.00      0.00        13
           Appeal_to_Popularity       0.00      0.00      0.00        34
      Exaggeration-Minimisation       0.20      0.23      0.21       115
            Conversation_Killer       0.00      0.00      0.00        25
                   Whataboutism       0.00      0.00      0.00         2
                        Slogans       0.18      0.36      0.24        28
           Guilt_by_Association       0.20      0.25      0.22         4
            Appeal_to_Hypocrisy       0.00      0.00      0.00         8
                     Repetition       0.11      0.04      0.06       141
                    Flag_Waving       0.46      0.35      0.40        96
          Name_Calling-Labeling       0.38      0.62      0.47       250
      Causal_Oversimplification       0.00      0.



Training loss per 100 training steps: 0.21347826719284058
Training loss per 100 training steps: 0.250872969996221
Training loss per 100 training steps: 0.2621333680268544
Training loss per 100 training steps: 0.2577254563025462
Training loss per 100 training steps: 0.26117084968714344
Training loss epoch: 0.2686115613326113
Training accuracy epoch: 0.0
Training eval
tensor(0.2096, device='cuda:2')
                                 precision    recall  f1-score   support

                          Doubt       0.78      1.00      0.87       518
Obfuscation-Vagueness-Confusion       0.25      1.00      0.40        18
           Appeal_to_Popularity       0.23      1.00      0.38        15
      Exaggeration-Minimisation       0.90      0.99      0.94       466
            Conversation_Killer       0.62      1.00      0.76        91
                   Whataboutism       0.18      1.00      0.31        16
                        Slogans       0.76      0.99      0.86       153
           Gui

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                                 precision    recall  f1-score   support

                          Doubt       0.25      0.31      0.28       187
Obfuscation-Vagueness-Confusion       0.00      0.00      0.00        13
           Appeal_to_Popularity       0.00      0.00      0.00        34
      Exaggeration-Minimisation       0.16      0.06      0.09       115
            Conversation_Killer       0.00      0.00      0.00        25
                   Whataboutism       0.00      0.00      0.00         2
                        Slogans       0.33      0.04      0.06        28
           Guilt_by_Association       0.00      0.00      0.00         4
            Appeal_to_Hypocrisy       0.00      0.00      0.00         8
                     Repetition       0.12      0.06      0.08       141
                    Flag_Waving       0.48      0.32      0.39        96
          Name_Calling-Labeling       0.54      0.19      0.28       250
      Causal_Oversimplification       0.05      0.



Training loss per 100 training steps: 0.30876579880714417
Training loss per 100 training steps: 0.25641207824839224
Training loss per 100 training steps: 0.24302255440114148
Training loss per 100 training steps: 0.24157288071523078
Training loss per 100 training steps: 0.24803455619889306
Training loss epoch: 0.25543137199067056
Training accuracy epoch: 0.0
Training eval
tensor(0.2004, device='cuda:2')
                                 precision    recall  f1-score   support

                          Doubt       0.89      1.00      0.94       518
Obfuscation-Vagueness-Confusion       0.31      1.00      0.47        18
           Appeal_to_Popularity       0.26      1.00      0.41        15
      Exaggeration-Minimisation       0.95      1.00      0.97       466
            Conversation_Killer       0.70      1.00      0.82        91
                   Whataboutism       0.33      1.00      0.50        16
                        Slogans       0.66      1.00      0.79       153
         

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                                 precision    recall  f1-score   support

                          Doubt       0.29      0.20      0.24       187
Obfuscation-Vagueness-Confusion       0.00      0.00      0.00        13
           Appeal_to_Popularity       0.00      0.00      0.00        34
      Exaggeration-Minimisation       0.14      0.04      0.07       115
            Conversation_Killer       0.00      0.00      0.00        25
                   Whataboutism       0.00      0.00      0.00         2
                        Slogans       0.19      0.11      0.14        28
           Guilt_by_Association       0.00      0.00      0.00         4
            Appeal_to_Hypocrisy       0.00      0.00      0.00         8
                     Repetition       0.11      0.06      0.08       141
                    Flag_Waving       0.47      0.29      0.36        96
          Name_Calling-Labeling       0.45      0.33      0.38       250
      Causal_Oversimplification       0.00      0.



Training loss per 100 training steps: 0.15931765735149384
Training loss per 100 training steps: 0.26149859301524586
Training loss per 100 training steps: 0.25917483191585067
Training loss per 100 training steps: 0.2501049651160985
Training loss per 100 training steps: 0.24709834689808605
Training loss epoch: 0.24618107063022066
Training accuracy epoch: 0.0
Training eval
tensor(0.1746, device='cuda:2')
                                 precision    recall  f1-score   support

                          Doubt       0.84      1.00      0.91       518
Obfuscation-Vagueness-Confusion       0.29      1.00      0.44        18
           Appeal_to_Popularity       0.26      1.00      0.41        15
      Exaggeration-Minimisation       0.88      1.00      0.94       466
            Conversation_Killer       0.59      1.00      0.74        91
                   Whataboutism       0.22      1.00      0.36        16
                        Slogans       0.80      1.00      0.89       153
          

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                                 precision    recall  f1-score   support

                          Doubt       0.26      0.27      0.26       187
Obfuscation-Vagueness-Confusion       0.00      0.00      0.00        13
           Appeal_to_Popularity       0.00      0.00      0.00        34
      Exaggeration-Minimisation       0.19      0.16      0.17       115
            Conversation_Killer       0.03      0.04      0.03        25
                   Whataboutism       0.00      0.00      0.00         2
                        Slogans       0.18      0.07      0.10        28
           Guilt_by_Association       0.00      0.00      0.00         4
            Appeal_to_Hypocrisy       0.00      0.00      0.00         8
                     Repetition       0.12      0.03      0.05       141
                    Flag_Waving       0.44      0.38      0.40        96
          Name_Calling-Labeling       0.49      0.22      0.30       250
      Causal_Oversimplification       0.00      0.



Training loss per 100 training steps: 0.1594698280096054
Training loss per 100 training steps: 0.23653937322963584
Training loss per 100 training steps: 0.2501224999997153
Training loss per 100 training steps: 0.25108122486509754
Training loss per 100 training steps: 0.24714540654881637
Training loss epoch: 0.247159979920438
Training accuracy epoch: 0.0
Training eval
tensor(0.1813, device='cuda:2')
                                 precision    recall  f1-score   support

                          Doubt       0.86      1.00      0.93       518
Obfuscation-Vagueness-Confusion       0.16      1.00      0.27        18
           Appeal_to_Popularity       0.17      1.00      0.30        15
      Exaggeration-Minimisation       0.62      1.00      0.77       466
            Conversation_Killer       0.61      1.00      0.76        91
                   Whataboutism       0.14      1.00      0.24        16
                        Slogans       0.60      1.00      0.75       153
           Gu

  _warn_prf(average, modifier, msg_start, len(result))


                                 precision    recall  f1-score   support

                          Doubt       0.26      0.23      0.25       187
Obfuscation-Vagueness-Confusion       0.00      0.00      0.00        13
           Appeal_to_Popularity       0.00      0.00      0.00        34
      Exaggeration-Minimisation       0.20      0.30      0.24       115
            Conversation_Killer       0.00      0.00      0.00        25
                   Whataboutism       0.00      0.00      0.00         2
                        Slogans       0.21      0.29      0.24        28
           Guilt_by_Association       0.50      0.25      0.33         4
            Appeal_to_Hypocrisy       0.00      0.00      0.00         8
                     Repetition       0.12      0.06      0.08       141
                    Flag_Waving       0.41      0.40      0.40        96
          Name_Calling-Labeling       0.39      0.57      0.47       250
      Causal_Oversimplification       0.00      0.

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
train_loss_history=[t.item() for t in train_loss_history]
test_loss_history=[t.item() for t in test_loss_history]

In [None]:
import matplotlib.pyplot as plt

plt.plot(train_loss_history, "--")
plt.plot(test_loss_history)

In [None]:
import matplotlib.pyplot as plt

plt.plot(train_f1micro_history, "o")
plt.plot(test_f1micro_history)