In [1]:
from google.colab import drive
drive.mount('/content/drive')
%cd '/content/drive/MyDrive/Year 3/Term 2/ANLE/Investigation'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/Year 3/Term 2/ANLE/Investigation


In [2]:
#Load dataset (Taken from week 7 lab)
import pandas as pd

train_path = "./propaganda_dataset_v2/propaganda_train.tsv"
val_path = "./propaganda_dataset_v2/propaganda_val.tsv"

train_df = pd.read_csv(train_path, delimiter="\t", quotechar='|')
train_df.head(20)

Unnamed: 0,label,tagged_in_context
0,not_propaganda,"No, <BOS> he <EOS> will not be confirmed."
1,not_propaganda,This declassification effort <BOS> won’t make ...
2,flag_waving,"""The Obama administration misled the <BOS> Ame..."
3,not_propaganda,“It looks like we’re capturing the demise of t...
4,not_propaganda,"<BOS> Location: Westerville, Ohio <EOS>"
5,loaded_language,"Hitler <BOS> annihilated <EOS> 400,000 Germans..."
6,not_propaganda,A federal judge on Monday ordered U.S. immigra...
7,not_propaganda,<BOS> Kirstjen Nielsen (@SecNielsen) <EOS> Nov...
8,doubt,"As noted above, at this point literally every ..."
9,not_propaganda,Britain doesn't need more hate even just for a...


In [3]:
val_df = pd.read_csv(val_path, delimiter="\t", quotechar='|')
val_df.head(20)

Unnamed: 0,label,tagged_in_context
0,not_propaganda,"On average, between 300 and 600 infections are..."
1,causal_oversimplification,Mostly because <BOS> the country would not las...
2,appeal_to_fear_prejudice,Lyndon Johnson <BOS> gets Earl Warren and Sen....
3,not_propaganda,<BOS> You <EOS> may opt out at anytime.
4,repetition,It must be exacted from him directly in order ...
5,"name_calling,labeling",Is it any wonder that priests and laity alike ...
6,loaded_language,Health workers have been asked to work with co...
7,not_propaganda,The Best of <BOS> Jacob <EOS> G. Hornberger
8,flag_waving,Trump began his remarks by setting out <BOS> I...
9,doubt,"<BOS> Now, the pope’s reply to my testimony wa..."


#Naive Bayes for sentence level classification

In [5]:
#get binary labels
def get_bin_labels(df):
  labels = []
  for ind,row in df.iterrows():
    if row['label'] == 'not_propaganda':
      labels.append(0)
    else:
      labels.append(1)
  return labels

train_bin_labels = get_bin_labels(train_df)
val_bin_labels = get_bin_labels(val_df)

In [13]:
#get sentences
def get_sents(df):
  sents = []
  for sentence in df["tagged_in_context"]:
    sents.append(sentence)
  return sents

train_sents = get_sents(train_df)
val_sents = get_sents(val_df)

In [None]:
train_tokens1 = [sent.lower().split() for sent in train_sents]
val_tokens1 = [sent.lower().split() for sent in val_sents]

def remove_tags(toks):
  for i, sent in enumerate(toks):
    for t in sent:
      if '<bos>' in t or '<eos>' in t:
        sent.remove(t)
    toks[i] = sent
  return toks

train_tokens1 = remove_tags(train_tokens1)
val_tokens1 = remove_tags(val_tokens1)

In [19]:
import numpy as np

class Vectorizer():

    def __init__(self, corpus=None):
        self.word_set = {}
        if corpus:
            self.fit(corpus)

    def fit(self, corpus): #learns vocabulary of given corpus
        ws = self.word_set
        for d in corpus: #iterates through corpus
            for t in d: #iterates through current doc
                if t not in ws: #checks if word has not been seen
                    ws[t] = len(ws) #if word not seen, add it to word set
        self.word_set = ws

    def transform(self, doc): #returns feature vector for given document based on learned vocabulary
        vec = np.zeros([len(self.word_set)], dtype=np.short) #generates vector of zeroes the same length as learned vocabulary
        for t in doc:
            if t in self.word_set:
                vec[self.word_set[t]] += 1 #for every instance of a known word, add 1 to corresponding position in vector
        return(vec)

In [None]:
V1 = Vectorizer()
V1.fit(train_tokens1)

train_vecs1 = np.array([V1.transform(x) for x in train_tokens1])
val_vecs1 = np.array([V1.transform(x) for x in val_tokens1])

In [4]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.naive_bayes import MultinomialNB

In [11]:
def get_bin_metrics(preds, y):
  return accuracy_score(y, preds), f1_score(y, preds), precision_score(y, preds), recall_score(y, preds)

In [12]:
classifier = MultinomialNB()
classifier.fit(train_vecs1, train_bin_labels)

preds1 = classifier.predict(val_vecs1)
print(get_bin_metrics(preds1, val_bin_labels))

(0.678125, 0.7238605898123323, 0.6178489702517163, 0.8737864077669902)


In [28]:
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords, wordnet
import nltk
import string
import pickle
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [29]:
def remove_punct_stop(sents): #remove punct and stopwords from tokens
  tokenized = []

  for i, s in enumerate(sents):
    for j, t in enumerate(s):
      for c in t:
        if c in string.punctuation:
          t = t.replace(c, "")
      s[j] = t
      if t in set(stopwords.words('english')): #remove propaganda snippet tags and stopwords
        s.remove(t)

    sents[i] = s

  return sents

In [None]:
train_tokens2 = remove_punct_stop(train_tokens1)
val_tokens2 = remove_punct_stop(val_tokens1)

In [21]:
V2 = Vectorizer(train_tokens2)
train_vecs2 = [V2.transform(x) for x in train_tokens2]
val_vecs2 = [V2.transform(x) for x in val_tokens2]

In [22]:
classifier2 = MultinomialNB() #fit classifier to training data
classifier2.fit(train_vecs2, train_bin_labels)

In [24]:
preds2 = classifier2.predict(val_vecs2)

print(get_bin_metrics(preds2, val_bin_labels))

(0.6875, 0.7134670487106016, 0.6401028277634961, 0.8058252427184466)


In [25]:
import re

def tokenize_snips(sents):
  tokenized = []

  for s in sents:
    result = re.search('<BOS>(.*)<EOS>', s).group(1) #finds snippet
    tokens = result.lower().split()
    tokenized.append(tokens)

  tokenized = remove_punct_stop(tokenized)

  return tokenized

train_tokens3 = tokenize_snips(train_sents)

In [26]:
V3 = Vectorizer(train_tokens3)

train_vecs3 = np.array([V3.transform(x) for x in train_tokens2])
val_vecs3 = np.array([V3.transform(x) for x in val_tokens2])

In [27]:
classifier3 = MultinomialNB()

classifier3.fit(train_vecs3, train_bin_labels)
preds3 = classifier3.predict(val_vecs3)
print(get_bin_metrics(preds3, val_bin_labels))

(0.6828125, 0.7070707070707072, 0.6380208333333334, 0.7928802588996764)


In [34]:
class TFIDF_calc(): #class for generating TF-IDF vectors

    def __init__(self, corpus):
        self.wordset = {}
        self.total_docs = len(corpus)
        self.fit(corpus)

    def fit(self, corpus):
        ws = self.wordset
        for d in corpus:
            dws = set()         #create set to hold unique words present in document
            for t in d:
                if t not in dws:
                    dws.add(t)
            for w in dws:
                if w not in ws:
                    ws[w] = [len(ws), 1] #if word unseen, add to corpus word set
                else:
                    ws[w][1] += 1 #if word already seen, increase num of documents it has been seen in by 1
        for w in ws:
            ws[w][1] = np.log(len(corpus)/ws[w][1]) #convert doc frequency to idf values
        self.wordset = ws

    def transform(self, doc):
        vec = np.zeros((len(self.wordset)), dtype='float')
        dws = {}
        for w in doc:
            if w in dws:
                dws[w] += 1
            else:
                dws[w] = 1
        for w in dws:
            tf = dws[w]/len(doc) #term frequency (tf) calculation
            if w in self.wordset:
                vec[self.wordset[w][0]] = tf*self.wordset[w][1]
        return vec

In [29]:
TV = TFIDF_calc(train_tokens2)

train_vecs4 = np.array([TV.transform(x) for x in train_tokens2])
val_vecs4 = np.array([TV.transform(x) for x in val_tokens2])

classifier4 = MultinomialNB()
classifier4.fit(train_vecs4, train_bin_labels)

preds4 = classifier4.predict(val_vecs4)

print(get_bin_metrics(preds4, val_bin_labels))

(0.69375, 0.7134502923976608, 0.6506666666666666, 0.7896440129449838)


#BERT for sentence classification

In [6]:
%cd /content

/content


In [7]:
!pip install accelerate -U



In [8]:
#### BERT CELLS MOSTLY TAKEN FROM https://thepythoncode.com/article/finetuning-bert-using-huggingface-transformers-python

from transformers import BertTokenizerFast, BertForSequenceClassification
from transformers import Trainer, TrainingArguments
from transformers.file_utils import is_tf_available, is_torch_available
import torch
import numpy as np

In [35]:
bert_v = "bert-base-uncased"

tokenizer = BertTokenizerFast.from_pretrained(bert_v, do_lower_case=True)

bin_bert = BertForSequenceClassification.from_pretrained(bert_v, num_labels=2).to("cuda")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]



config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
import random
def set_seed(seed: int): #function to ensure reproducable behaviour
    random.seed(seed)
    np.random.seed(seed)
    if is_torch_available():
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    if is_tf_available():
        import tensorflow as tf
        tf.random.set_seed(seed)

set_seed(1)

In [None]:
#find longest sentence
longest = -1
for sent in train_tokens1:
  lon = len(sent)
  if longest < lon:
    longest = lon

In [None]:
def remove_tags(sents):
  for i, s in enumerate(sents):
    s = s.replace('<BOS>', "")
    s = s.replace('<EOS>', "")
    sents[i] = s
  return sents

train_sents = remove_tags(train_sents)
val_sents = remove_tags(val_sents)

In [None]:
train_tokens_bert = tokenizer(train_sents, truncation=True, padding=True, max_length=220)
val_tokens_bert = tokenizer(val_sents, truncation=True, padding=True, max_length=220)

In [10]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {k: torch.tensor(v[idx]) for k, v in self.encodings.items()}
        item["labels"] = torch.tensor([self.labels[idx]])
        return item

    def __len__(self):
        return len(self.labels)

In [None]:
train_dataset_bin = Dataset(train_tokens_bert, train_bin_labels)
val_dataset_bin = Dataset(val_tokens_bert, val_bin_labels)

In [None]:
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    weight_decay=0.01,
    logging_dir='./logs',
    load_best_model_at_end=True,
    logging_steps=60,
    save_steps=60,
    evaluation_strategy="steps",
)



In [21]:
from sklearn.metrics import accuracy_score, f1_score
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    acc = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average='macro')
    return {
        'accuracy' : acc,
    }

In [None]:
trainer = Trainer(
    model=bin_bert,
    args=training_args,
    train_dataset=train_dataset_bin,
    eval_dataset=val_dataset_bin,
    compute_metrics=compute_metrics
)

In [None]:
###OUTPUTS WITH 64 BATCH SIZE, 3 EPOCHS
outputs = trainer.predict(val_dataset_bin)
preds_binbert = [pred.argmax(-1) for pred in outputs.predictions]
print(get_bin_metrics(preds_binbert, val_bin_labels))

(0.85, 0.8350515463917526, 0.8901098901098901, 0.7864077669902912)


In [None]:
#DO NOT REMOVE, TRAINING LOG WITH 32 BATCH SIZE 3 EPOCHS
#trainer.train()

Step,Training Loss,Validation Loss,Accuracy
60,0.2773,0.275316,0.878125
120,0.195,0.192973,0.940625
180,0.0717,0.226782,0.935937
240,0.0485,0.189649,0.948438


TrainOutput(global_step=240, training_loss=0.14810688495635987, metrics={'train_runtime': 264.1608, 'train_samples_per_second': 29.073, 'train_steps_per_second': 0.909, 'total_flos': 722239846963200.0, 'train_loss': 0.14810688495635987, 'epoch': 3.0})

In [None]:
###OUTPUTS WITH 32 BATCH SIZE, 3 EPOCHS
outputs = trainer.predict(val_dataset_bin)
preds_binbert = [pred.argmax(-1) for pred in outputs.predictions]
print(get_bin_metrics(preds_binbert, val_bin_labels))

(0.9484375, 0.9468599033816425, 0.9423076923076923, 0.9514563106796117)


In [None]:
#LOGS WITH 32 BATCH SIZE, 10 EPOCHS
#trainer.train()

Step,Training Loss,Validation Loss,Accuracy
60,0.4097,0.215257,0.9125
120,0.1613,0.1584,0.935937
180,0.0817,0.317871,0.934375
240,0.0575,0.251256,0.93125
300,0.0203,0.353328,0.929688
360,0.0024,0.288629,0.954688
420,0.0102,0.4124,0.9375
480,0.0079,0.3481,0.945312
540,0.0005,0.367869,0.946875
600,0.0003,0.382369,0.942187


TrainOutput(global_step=800, training_loss=0.05662302584387362, metrics={'train_runtime': 1057.0244, 'train_samples_per_second': 24.219, 'train_steps_per_second': 0.757, 'total_flos': 2407466156544000.0, 'train_loss': 0.05662302584387362, 'epoch': 10.0})

In [None]:
###OUTPUTS WITH 32 BATCH SIZE, 10 EPOCHS
outputs = trainer.predict(val_dataset_bin)
preds_binbert = [pred.argmax(-1) for pred in outputs.predictions]
print(get_bin_metrics(preds_binbert, val_bin_labels))

(0.9359375, 0.9337641357027464, 0.932258064516129, 0.9352750809061489)


In [None]:
#LOGS FOR 16 BATCH SIZE, 5 EPOCHS
#trainer.train()

Step,Training Loss,Validation Loss,Accuracy
60,0.1574,0.338036,0.920312
120,0.1269,0.263896,0.926562
180,0.0861,0.330745,0.9375
240,0.0734,0.33862,0.939063
300,0.1224,0.244833,0.942187
360,0.0392,0.35161,0.934375
420,0.0375,0.255032,0.95625
480,0.0153,0.241805,0.957812
540,0.0021,0.274536,0.946875
600,0.0141,0.286277,0.95


TrainOutput(global_step=800, training_loss=0.05141619648085907, metrics={'train_runtime': 600.6784, 'train_samples_per_second': 21.309, 'train_steps_per_second': 1.332, 'total_flos': 1203733078272000.0, 'train_loss': 0.05141619648085907, 'epoch': 5.0})

In [None]:
###OUTPUTS FOR BATCH SIZE 16, 5 EPOCHS
outputs = trainer.predict(val_dataset_bin)
preds_binbert = [pred.argmax(-1) for pred in outputs.predictions]
print(get_bin_metrics(preds_binbert, val_bin_labels))

(0.9578125, 0.9565217391304348, 0.9519230769230769, 0.9611650485436893)


#Naive Bayes Task 2

In [14]:
label_map = {
      'appeal_to_fear_prejudice' : 0,
      'causal_oversimplification' : 1,
      'doubt' : 2,
      'exaggeration,minimisation' : 3,
      'flag_waving' : 4,
      'loaded_language' : 5,
      'name_calling,labeling' : 6,
      'repetition' : 7,
  }

In [15]:
import re

def get_snips(df):
  snips = []
  labs = []
  for ind, row in df.iterrows():
    if row['label'] != 'not_propaganda':
      labs.append(label_map[row['label']])
      sent = row['tagged_in_context']
      snips.append(re.search('<BOS>(.*)<EOS>', sent).group(1))
  return snips, labs

train_snips, train_snip_labs = get_snips(train_df)
val_snips, val_snip_labs = get_snips(val_df)

In [None]:
print(re.search('<BOS>(.*)<EOS>', train_sents).group(1))

In [17]:
train_tokens1 = [s.lower().split() for s in train_snips]
val_tokens1 = [s.lower().split() for s in val_snips]

In [20]:
V1 = Vectorizer(train_tokens1)
train_vecs1 = [V1.transform(x) for x in train_tokens1]
val_vecs1 = [V1.transform(x) for x in val_tokens1]

In [16]:
new_map = {j:i for i,j in label_map.items()}
def get_snip_metrics(preds, y):
  return accuracy_score(y, preds), f1_score(y, preds, average='macro'), precision_score(y, preds, average='macro'), recall_score(y, preds, average='macro')

def get_class_metrics(preds, y):
    val_pns = [[0,0,0] for i in range(8)]# 0: TP, 1: FP, 2: FN

    for i in range(len(preds)):
        if preds[i] == y[i]:
            val_pns[preds[i]][0] += 1 #increase TP count on predicted/true class
        else:
            val_pns[y[i]][2] += 1 #increase FN count on true class
            val_pns[preds[i]][1] += 1 #increase FP count on predicted class

    pre_re = []
    for c in val_pns:   #calculates precision recall and f1 for each class
        pre = c[0]/(c[0]+c[1])
        re = c[0]/(c[0]+c[2])
        f1 = 2*((pre*re)/(pre+re))
        pre_re.append([pre, re, f1])

    return pre_re

def show_class_metrics(pre_re):
  for i, pre in enumerate(pre_re):
    print(f'{new_map[i]}: Precision = {pre[0]:.3f}, Recall = {pre[1]:.3f}, F1-Score = {pre[2]:.3f}')


In [24]:
classifier1 = MultinomialNB()
classifier1.fit(train_vecs1, train_snip_labs)
preds1 = classifier1.predict(val_vecs1)

In [26]:
print(get_snip_metrics(preds1, val_snip_labs))
show_class_metrics(get_class_metrics(preds1, val_snip_labs))

(0.3948220064724919, 0.3780437205754118, 0.5223957829804604, 0.39233571708349413)
appeal_to_fear_prejudice: Precision = 0.375, Recall = 0.419, F1-Score = 0.396
causal_oversimplification: Precision = 0.301, Recall = 0.800, F1-Score = 0.438
doubt: Precision = 0.308, Recall = 0.465, F1-Score = 0.370
exaggeration,minimisation: Precision = 0.262, Recall = 0.367, F1-Score = 0.306
flag_waving: Precision = 0.742, Recall = 0.511, F1-Score = 0.605
loaded_language: Precision = 0.727, Recall = 0.205, F1-Score = 0.320
name_calling,labeling: Precision = 0.714, Recall = 0.147, F1-Score = 0.244
repetition: Precision = 0.750, Recall = 0.225, F1-Score = 0.346


In [30]:
train_tokens2 = remove_punct_stop(train_tokens1)
val_tokens2 = remove_punct_stop(val_tokens1)

V2 = Vectorizer(train_tokens2)
train_vecs2 = [V2.transform(x) for x in train_tokens2]
val_vecs2 = [V2.transform(x) for x in val_tokens2]

In [32]:
classifier2 = MultinomialNB()
classifier2.fit(train_vecs2, train_snip_labs)
preds2 = classifier2.predict(val_vecs2)

print(get_snip_metrics(preds2, val_snip_labs))
show_class_metrics(get_class_metrics(preds2, val_snip_labs))

(0.3948220064724919, 0.38266357774104676, 0.4621960002716582, 0.39116199103374205)
appeal_to_fear_prejudice: Precision = 0.396, Recall = 0.442, F1-Score = 0.418
causal_oversimplification: Precision = 0.292, Recall = 0.543, F1-Score = 0.380
doubt: Precision = 0.351, Recall = 0.465, F1-Score = 0.400
exaggeration,minimisation: Precision = 0.250, Recall = 0.467, F1-Score = 0.326
flag_waving: Precision = 0.595, Recall = 0.556, F1-Score = 0.575
loaded_language: Precision = 0.643, Recall = 0.231, F1-Score = 0.340
name_calling,labeling: Precision = 0.545, Recall = 0.176, F1-Score = 0.267
repetition: Precision = 0.625, Recall = 0.250, F1-Score = 0.357


In [35]:
TV = TFIDF_calc(train_tokens2)
train_vecs3 = [TV.transform(x) for x in train_tokens2]
val_vecs3 = [TV.transform(x) for x in val_tokens2]

In [36]:
classifier3 = MultinomialNB()
classifier3.fit(train_vecs3, train_snip_labs)
preds3 = classifier3.predict(val_vecs3)

print(get_snip_metrics(preds3, val_snip_labs))
show_class_metrics(get_class_metrics(preds3, val_snip_labs))

(0.43042071197411, 0.4260590245568021, 0.47677339591921564, 0.4306537753741926)
appeal_to_fear_prejudice: Precision = 0.447, Recall = 0.395, F1-Score = 0.420
causal_oversimplification: Precision = 0.278, Recall = 0.429, F1-Score = 0.337
doubt: Precision = 0.442, Recall = 0.442, F1-Score = 0.442
exaggeration,minimisation: Precision = 0.279, Recall = 0.633, F1-Score = 0.388
flag_waving: Precision = 0.574, Recall = 0.600, F1-Score = 0.587
loaded_language: Precision = 0.556, Recall = 0.256, F1-Score = 0.351
name_calling,labeling: Precision = 0.529, Recall = 0.265, F1-Score = 0.353
repetition: Precision = 0.708, Recall = 0.425, F1-Score = 0.531


#BERT for task 2

In [11]:
bert_v = "bert-base-uncased"

tokenizer = BertTokenizerFast.from_pretrained(bert_v, do_lower_case=True)

snip_bert = BertForSequenceClassification.from_pretrained(bert_v, num_labels=8).to("cuda")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [12]:
longest = -1
for i, s in enumerate(train_snips):
  if len(s.split()) > longest:
    longest = len(s.split())

print(longest)

NameError: name 'train_snips' is not defined

In [17]:
train_snip_tokens = tokenizer(train_snips, truncation=True, padding=True, max_length=220)
val_snip_tokens = tokenizer(val_snips, truncation=True, padding=True, max_length=220)

In [18]:
train_dataset = Dataset(train_snip_tokens, train_snip_labs)
val_dataset = Dataset(val_snip_tokens, val_snip_labs)

In [19]:
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=10,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    weight_decay=0.015,
    logging_dir='./logs',
    load_best_model_at_end=True,
    logging_steps=80,
    save_steps=80,
    evaluation_strategy="steps",
)



In [22]:
trainer = Trainer(
    model=snip_bert,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics
)

In [31]:
#LOGS FOR 32 BATCH SIZE, 5 EPOCHS
trainer.train()

Step,Training Loss,Validation Loss,Accuracy
60,1.6821,1.396373,0.488673
120,0.9829,1.115628,0.618123
180,0.5248,1.084315,0.621359


TrainOutput(global_step=205, training_loss=0.9812154909459556, metrics={'train_runtime': 216.2059, 'train_samples_per_second': 29.856, 'train_steps_per_second': 0.948, 'total_flos': 577215554870880.0, 'train_loss': 0.9812154909459556, 'epoch': 5.0})

In [36]:
#OUTPUTS FOR 32 BATCH SIZE, 5 EPOCHS
outputs = trainer.predict(val_dataset)
preds_snipbert = [pred.argmax(-1) for pred in outputs.predictions]
print(get_snip_metrics(preds_snipbert, val_snip_labs))
show_class_metrics(get_class_metrics(preds_snipbert, val_snip_labs))

(0.6213592233009708, 0.6166753567891783, 0.6245528664951282, 0.6201222274837186)
appeal_to_fear_prejudice: Precision = 0.647, Recall = 0.512, F1-Score = 0.571
causal_oversimplification: Precision = 0.700, Recall = 0.800, F1-Score = 0.747
doubt: Precision = 0.711, Recall = 0.628, F1-Score = 0.667
exaggeration,minimisation: Precision = 0.475, Recall = 0.633, F1-Score = 0.543
flag_waving: Precision = 0.729, Recall = 0.778, F1-Score = 0.753
loaded_language: Precision = 0.590, Recall = 0.590, F1-Score = 0.590
name_calling,labeling: Precision = 0.667, Recall = 0.471, F1-Score = 0.552
repetition: Precision = 0.478, Recall = 0.550, F1-Score = 0.512


In [45]:
#LOGS 16 BATCH SIZE, 5 EPOCHS
trainer.train()

Step,Training Loss,Validation Loss,Accuracy
80,0.1166,1.734236,0.61165
160,0.1234,1.966767,0.627832
240,0.117,2.00812,0.608414
320,0.0588,1.947041,0.63754
400,0.0481,1.994384,0.627832


TrainOutput(global_step=405, training_loss=0.09169212289062548, metrics={'train_runtime': 304.8528, 'train_samples_per_second': 21.174, 'train_steps_per_second': 1.329, 'total_flos': 577215554870880.0, 'train_loss': 0.09169212289062548, 'epoch': 5.0})

In [46]:
#OUTPUTS 16 BATCH SIZE, 5 EPOCHS
outputs = trainer.predict(val_dataset)
preds_snipbert = [pred.argmax(-1) for pred in outputs.predictions]
print(get_snip_metrics(preds_snipbert, val_snip_labs))
show_class_metrics(get_class_metrics(preds_snipbert, val_snip_labs))

(0.6116504854368932, 0.6114387267396625, 0.6469628052764845, 0.6087777470857156)
appeal_to_fear_prejudice: Precision = 0.556, Recall = 0.698, F1-Score = 0.619
causal_oversimplification: Precision = 0.571, Recall = 0.800, F1-Score = 0.667
doubt: Precision = 0.778, Recall = 0.488, F1-Score = 0.600
exaggeration,minimisation: Precision = 0.591, Recall = 0.433, F1-Score = 0.500
flag_waving: Precision = 0.875, Recall = 0.622, F1-Score = 0.727
loaded_language: Precision = 0.500, Recall = 0.615, F1-Score = 0.552
name_calling,labeling: Precision = 0.833, Recall = 0.588, F1-Score = 0.690
repetition: Precision = 0.472, Recall = 0.625, F1-Score = 0.538


In [23]:
%cd /content
#LOGS 32 BATCH SIZE, 10 EPOCHS, WEIGHT DECAY 0.015
trainer.train()

/content


Step,Training Loss,Validation Loss,Accuracy
80,1.6563,1.340312,0.517799
160,0.7323,1.130711,0.614887
240,0.244,1.271079,0.631068
320,0.0994,1.400231,0.640777
400,0.058,1.462992,0.640777


TrainOutput(global_step=410, training_loss=0.5464059454638783, metrics={'train_runtime': 458.9291, 'train_samples_per_second': 28.131, 'train_steps_per_second': 0.893, 'total_flos': 1154431109741760.0, 'train_loss': 0.5464059454638783, 'epoch': 10.0})

In [24]:
#OUTPUTS 32 BATCH SIZE, 10 EPOCHS, WEIGHT DECAY 0.015
outputs = trainer.predict(val_dataset)
preds_snipbert = [pred.argmax(-1) for pred in outputs.predictions]
print(get_snip_metrics(preds_snipbert, val_snip_labs))
show_class_metrics(get_class_metrics(preds_snipbert, val_snip_labs))

(0.6148867313915858, 0.6091729551142661, 0.6176686207589306, 0.6178303812580488)
appeal_to_fear_prejudice: Precision = 0.571, Recall = 0.465, F1-Score = 0.513
causal_oversimplification: Precision = 0.489, Recall = 0.657, F1-Score = 0.561
doubt: Precision = 0.644, Recall = 0.674, F1-Score = 0.659
exaggeration,minimisation: Precision = 0.545, Recall = 0.600, F1-Score = 0.571
flag_waving: Precision = 0.717, Recall = 0.733, F1-Score = 0.725
loaded_language: Precision = 0.629, Recall = 0.564, F1-Score = 0.595
name_calling,labeling: Precision = 0.636, Recall = 0.824, F1-Score = 0.718
repetition: Precision = 0.708, Recall = 0.425, F1-Score = 0.531


In [25]:
drop_bert = BertForSequenceClassification.from_pretrained(bert_v, num_labels=8, hidden_dropout_prob=0.25, attention_probs_dropout_prob=0.25, classifier_dropout=0.5).to("cuda")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [26]:
drop_trainer = Trainer(
    model=drop_bert,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics
)

In [27]:
#LOGS WITH DROPOUT 0.25 0.25 0.5
drop_trainer.train()

Step,Training Loss,Validation Loss,Accuracy
80,1.9009,1.676046,0.330097
160,1.4971,1.309475,0.508091
240,1.041,1.167139,0.566343
320,0.7576,1.145074,0.618123
400,0.5858,1.158513,0.618123


TrainOutput(global_step=410, training_loss=1.1431919586367723, metrics={'train_runtime': 437.2305, 'train_samples_per_second': 29.527, 'train_steps_per_second': 0.938, 'total_flos': 1154431109741760.0, 'train_loss': 1.1431919586367723, 'epoch': 10.0})

In [28]:
#OUTPUTS DROPOUT 0.25 0.25 0.5
outputs = drop_trainer.predict(val_dataset)
preds_snipbert = [pred.argmax(-1) for pred in outputs.predictions]
print(get_snip_metrics(preds_snipbert, val_snip_labs))
show_class_metrics(get_class_metrics(preds_snipbert, val_snip_labs))

(0.6181229773462783, 0.6149535214950682, 0.6280272215158536, 0.6148115194199326)
appeal_to_fear_prejudice: Precision = 0.592, Recall = 0.674, F1-Score = 0.630
causal_oversimplification: Precision = 0.512, Recall = 0.629, F1-Score = 0.564
doubt: Precision = 0.697, Recall = 0.535, F1-Score = 0.605
exaggeration,minimisation: Precision = 0.528, Recall = 0.633, F1-Score = 0.576
flag_waving: Precision = 0.750, Recall = 0.800, F1-Score = 0.774
loaded_language: Precision = 0.583, Recall = 0.538, F1-Score = 0.560
name_calling,labeling: Precision = 0.826, Recall = 0.559, F1-Score = 0.667
repetition: Precision = 0.537, Recall = 0.550, F1-Score = 0.543
