In [1]:
!python -m pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.25.1-py3-none-any.whl (5.8 MB)
[K     |████████████████████████████████| 5.8 MB 4.9 MB/s 
Collecting huggingface-hub<1.0,>=0.10.0
  Downloading huggingface_hub-0.11.1-py3-none-any.whl (182 kB)
[K     |████████████████████████████████| 182 kB 53.6 MB/s 
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 58.5 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.11.1 tokenizers-0.13.2 transformers-4.25.1


In [None]:
# !python -m pip install torchmetrics

In [None]:
# import torchmetrics

In [1]:
import transformers

In [2]:
# import torchmetrics

In [3]:
import traceback
import csv

import pandas as pd


def write_tsv_dataframe(filepath, dataframe):
    """
        Stores `DataFrame` as tsv file

        Parameters
        ----------
        filepath : str
            Path to tsv file
        dataframe : pd.DataFrame
            DataFrame to store

        Raises
        ------
        IOError
            if the file can't be opened
    """
    try:
        dataframe.to_csv(filepath, encoding='utf-8', sep='\t', index=False, header=True, quoting=csv.QUOTE_NONE)
    except IOError:
        traceback.print_exc()


In [4]:
def combine_columns(df_arguments, df_labels):
    """Combines the two `DataFrames` on column `Argument ID`"""
    return pd.merge(df_arguments, df_labels, on='Argument ID')


In [5]:
def split_arguments(df_arguments):
    """Splits `DataFrame` by column `Usage` into `train`-, `validation`-, and `test`-arguments"""
    train_arguments = df_arguments.loc[df_arguments['Usage'] == 'train'].drop(['Usage'], axis=1).reset_index(drop=True)
    valid_arguments = df_arguments.loc[df_arguments['Usage'] == 'validation'].drop(['Usage'], axis=1).reset_index(drop=True)
    test_arguments = df_arguments.loc[df_arguments['Usage'] == 'test'].drop(['Usage'], axis=1).reset_index(drop=True)
    
    return train_arguments, valid_arguments, test_arguments


In [6]:
def create_dataframe_head(argument_ids, model_name):
    """
        Creates `DataFrame` usable to append predictions to it

        Parameters
        ----------
        argument_ids : list[str]
            First column of the resulting DataFrame
        model_name : str
            Second column of DataFrame will contain the given model name

        Returns
        -------
        pd.DataFrame
            prepared DataFrame
    """
    df_model_head = pd.DataFrame(argument_ids, columns=['Argument ID'])
    df_model_head['Method'] = [model_name] * len(argument_ids)

    return df_model_head


In [7]:
import json
class MissingColumnError(AttributeError):
    """Error indicating that an imported DataFrame lacks necessary columns"""
    pass


In [8]:
def load_json_file(filepath):
    """Load content of json-file from `filepath`"""
    with open(filepath, 'r') as  json_file:
        return json.load(json_file)


In [9]:
def load_values_from_json(filepath):
    """Load values per level from json-file from `filepath`"""
    json_values = load_json_file(filepath)
    values = { "1":set(), "2":set(), "3":set(), "4a":set(), "4b":set() }
    for value in json_values["values"]:
        values["1"].add(value["name"])
        values["2"].add(value["level2"])
        for valueLevel3 in value["level3"]:
            values["3"].add(valueLevel3)
        for valueLevel4a in value["level4a"]:
            values["4a"].add(valueLevel4a)
        for valueLevel4b in value["level4b"]:
            values["4b"].add(valueLevel4b)
    values["1"] = sorted(values["1"])
    values["2"] = sorted(values["2"])
    values["3"] = sorted(values["3"])
    values["4a"] = sorted(values["4a"])
    values["4b"] = sorted(values["4b"])
    return values


In [10]:
def load_arguments_from_tsv(filepath, default_usage='test'):
    """
        Reads arguments from tsv file

        Parameters
        ----------
        filepath : str
            The path to the tsv file
        default_usage : str, optional
            The default value if the column "Usage" is missing

        Returns
        -------
        pd.DataFrame
            the DataFrame with all arguments

        Raises
        ------
        MissingColumnError
            if the required columns "Argument ID" or "Premise" are missing in the read data
        IOError
            if the file can't be read
        """
    try:
        dataframe = pd.read_csv(filepath, encoding='utf-8', sep='\t', header=0)
        if not {'Argument ID', 'Premise'}.issubset(set(dataframe.columns.values)):
            raise MissingColumnError('The argument "%s" file does not contain the minimum required columns [Argument ID, Premise].' % filepath)
        if 'Usage' not in dataframe.columns.values:
            dataframe['Usage'] = [default_usage] * len(dataframe)
        return dataframe
    except IOError:
        traceback.print_exc()
        raise


In [11]:
def load_labels_from_tsv(filepath, label_order):
    """
        Reads label annotations from tsv file

        Parameters
        ----------
        filepath : str
            The path to the tsv file
        label_order : list[str]
            The listing and order of the labels to use from the read data

        Returns
        -------
        pd.DataFrame
            the DataFrame with the annotations

        Raises
        ------
        MissingColumnError
            if the required columns "Argument ID" or names from `label_order` are missing in the read data
        IOError
            if the file can't be read
        """
    try:
        dataframe = pd.read_csv(filepath, encoding='utf-8', sep='\t', header=0)
        dataframe = dataframe[['Argument ID'] + label_order]
        return dataframe
    except IOError:
        traceback.print_exc()
        raise
    except KeyError:
        raise MissingColumnError('The file "%s" does not contain the required columns for its level.' % filepath)


In [12]:
import sys
import getopt
import os

In [13]:
model_dir = 'models'
data_dir = 'data'

In [14]:
if not os.path.exists(model_dir):
    os.makedirs(model_dir)


In [15]:
argument_filepath = os.path.join(data_dir, 'arguments.tsv')
value_json_filepath = os.path.join(data_dir, 'values.json')


In [16]:
df_arguments = load_arguments_from_tsv(argument_filepath, default_usage='train')

In [17]:
values = load_values_from_json(value_json_filepath)
num_labels_Lv2 = len(values['2'])


In [18]:
df_arguments.keys()

Index(['Argument ID', 'Part', 'Usage', 'Conclusion', 'Stance', 'Premise'], dtype='object')

In [19]:
# for ip in df_arguments['Argument ID']:
#   #print(df_arguments['Stance'][ip])
  
#   print(ip)


In [20]:
level =2
label_filepath = os.path.join(data_dir, 'labels-level{}.tsv'.format(str(level)))
df_labels = load_labels_from_tsv(label_filepath, values[str(level)])

In [21]:
a = df_labels.keys()
for key in df_labels.keys():
  print(len(df_labels[key]),key)

5270 Argument ID
5270 Achievement
5270 Benevolence: caring
5270 Benevolence: dependability
5270 Conformity: interpersonal
5270 Conformity: rules
5270 Face
5270 Hedonism
5270 Humility
5270 Power: dominance
5270 Power: resources
5270 Security: personal
5270 Security: societal
5270 Self-direction: action
5270 Self-direction: thought
5270 Stimulation
5270 Tradition
5270 Universalism: concern
5270 Universalism: nature
5270 Universalism: objectivity
5270 Universalism: tolerance


In [22]:

df_labels['Achievement'][0]

0

In [23]:
from typing import Dict, List

In [24]:
def generate_pairwise_input(dataset, labels, datatype):
    """
    TODO: group all premises and corresponding hypotheses and labels of the datapoints
    a datapoint as seen earlier is a dict of premis, hypothesis and label
    """
    #raise NotImplementedError
    premise=[]
    conclusion=[]
    stance=[]
    n_labels =labels.keys()
    n_labels = n_labels[1:]
    print(n_labels)
    label=[]
    
    n = len(dataset['Argument ID'])
    m = len(labels['Argument ID'])
    arguments = []
    print(n,m)
    for i in range(n):
        if dataset['Usage'][i]==datatype:
          premise.append(dataset['Premise'][i])
          conclusion.append(dataset['Conclusion'][i])
          stance.append(dataset['Stance'][i])
          arguments.append(dataset['Argument ID'][i])
    for i in range(m):
        if (labels['Argument ID'][i] in arguments):
          sent_label = []
          #print(i)
          for l in range(len(n_labels)):
              #print(n_labels[l])
              sent_label.append(int(labels[n_labels[l]][i]))
          label.append(sent_label)

    return premise, conclusion, stance, label

In [25]:
train_premises, train_conclusion, train_stance, train_labels = generate_pairwise_input(df_arguments, df_labels, 'train')
val_premises, val_conclusion, val_stance, val_labels = generate_pairwise_input(df_arguments, df_labels, 'validation')
test_premises, test_conclusion, test_stance, test_labels = generate_pairwise_input(df_arguments, df_labels, 'test')

Index(['Achievement', 'Benevolence: caring', 'Benevolence: dependability',
       'Conformity: interpersonal', 'Conformity: rules', 'Face', 'Hedonism',
       'Humility', 'Power: dominance', 'Power: resources',
       'Security: personal', 'Security: societal', 'Self-direction: action',
       'Self-direction: thought', 'Stimulation', 'Tradition',
       'Universalism: concern', 'Universalism: nature',
       'Universalism: objectivity', 'Universalism: tolerance'],
      dtype='object')
5270 5270
Index(['Achievement', 'Benevolence: caring', 'Benevolence: dependability',
       'Conformity: interpersonal', 'Conformity: rules', 'Face', 'Hedonism',
       'Humility', 'Power: dominance', 'Power: resources',
       'Security: personal', 'Security: societal', 'Self-direction: action',
       'Self-direction: thought', 'Stimulation', 'Tradition',
       'Universalism: concern', 'Universalism: nature',
       'Universalism: objectivity', 'Universalism: tolerance'],
      dtype='object')
5270 5

In [26]:
#Randomize them first
# train_premises, train_conclusion, train_stance, train_labels = generate_pairwise_input(df_arguments, df_labels)


In [27]:
# import random 
# random.seed(42)
# def randomize_data(premises, conclusion, stance, labels):
#   n = len(premises)
#   data = list(range(n))
#   random.shuffle(data)
#   train_premises = []
#   train_conclusion = []
#   train_stance = []
#   train_labels = []
#   for i in data:
#     train_premises.append(premises[i])
#     train_conclusion.append(conclusion[i])
#     train_stance.append(stance[i])
#     train_labels.append(labels[:][i])
#   return train_premises, train_conclusion, train_stance, train_labels


In [28]:
# train_premises, train_conclusion, train_stance, train_labels = randomize_data(train_premises, train_conclusion, train_stance, train_labels)

In [29]:
# val_premises = train_premises[-500:]
# val_conclusion = train_conclusion[-500:]
# val_stance = train_stance[-500:]
# val_labels = train_labels[:][-500:]

In [30]:
# train_premises = train_premises[:-500]
# train_conclusion = train_conclusion[:-500]
# train_stance = train_stance[:-500]
# train_labels = train_labels[:][:-500]

In [31]:
# Nothing to do for this class!
import torch
from transformers import BertModel
from transformers import AutoTokenizer
from typing import Dict, List

class BatchTokenizer:
    """Tokenizes and pads a batch of input sentences."""

    def __init__(self):
        """Initializes the tokenizer

        Args:
            pad_symbol (Optional[str], optional): The symbol for a pad. Defaults to "<P>".
        """
        self.hf_tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
    
    def get_sep_token(self,):
        return self.hf_tokenizer.sep_token
    
    def __call__(self, prem_batch: List[str], hyp_batch: List[str], stance_batch: List[str]) -> List[List[str]]:
        """Uses the huggingface tokenizer to tokenize and pad a batch.

        We return a dictionary of tensors per the huggingface model specification.

        Args:
            batch (List[str]): A List of sentence strings

        Returns:
            Dict: The dictionary of token specifications provided by HuggingFace
        """
        # The HF tokenizer will PAD for us, and additionally combine 
        # The two sentences deimited by the [SEP] token.
        batch_len = len(prem_batch)
        #spaces = [" "]*batch_len
        conc_batch = [stance_batch[i]+" "+hyp_batch[i] for i in range(batch_len)]
        enc = self.hf_tokenizer(
            prem_batch,
            conc_batch,
            padding=True,
            return_token_type_ids=False,
            return_tensors='pt'
        )

        return enc
    

# HERE IS AN EXAMPLE OF HOW TO USE THE BATCH TOKENIZER
tokenizer = BatchTokenizer()
a = [["this is the premise.", "This is also a premise"], ["this is the hypothesis", "This is a second hypothesis"],["in favour of", "against"]]
x = tokenizer(*a)
print(x)
tokenizer.hf_tokenizer.batch_decode(x["input_ids"])



{'input_ids': tensor([[  101,  2023,  2003,  1996, 18458,  1012,   102,  1999,  7927,  1997,
          2023,  2003,  1996, 10744,   102],
        [  101,  2023,  2003,  2036,  1037, 18458,   102,  2114,  2023,  2003,
          1037,  2117, 10744,   102,     0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0]])}


['[CLS] this is the premise. [SEP] in favour of this is the hypothesis [SEP]',
 '[CLS] this is also a premise [SEP] against this is a second hypothesis [SEP] [PAD]']

In [32]:
def chunk(lst, n):
    """Yield successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        yield lst[:][i:i + n]

def chunk_multi(lst1, lst2, lst3, n):
    for i in range(0, len(lst1), n):
        yield lst1[i: i + n], lst2[i: i + n], lst3[i: i + n]
        


In [33]:
sum=0
import numpy as np
# for i in range(5270):
#   sum += np.sum(np.array(train_labels[:][i]))
print(np.sum(np.array(train_labels)))
#print(sum)

14675


In [34]:
# against=0
# infavour = 0
# for i in range(4770):
#   if(train_stance[i]=='against'):
#     against +=1
#   elif(train_stance[i]=='in favor of'):
#     infavour += 1
#   else:
#     print(train_stance[i])


In [35]:
# Notice that since we use huggingface, we tokenize and
# encode in all at once!
batch_size=64
tokenizer = BatchTokenizer()
train_input_batches = [b for b in chunk_multi(train_premises, train_conclusion, train_stance, batch_size)]
# Tokenize + encode
train_input_batches = [tokenizer(*batch) for batch in train_input_batches]

In [36]:
val_input_batches = [b for b in chunk_multi(val_premises, val_conclusion, val_stance, batch_size)]
# Tokenize + encode
val_input_batches = [tokenizer(*batch) for batch in val_input_batches]


In [37]:
len(val_labels)

277

In [38]:
label_ids = ['Achievement', 'Benevolence: caring', 'Benevolence: dependability',
       'Conformity: interpersonal', 'Conformity: rules', 'Face', 'Hedonism',
       'Humility', 'Power: dominance', 'Power: resources',
       'Security: personal', 'Security: societal', 'Self-direction: action',
       'Self-direction: thought', 'Stimulation', 'Tradition',
       'Universalism: concern', 'Universalism: nature',
       'Universalism: objectivity', 'Universalism: tolerance']
l_ids = []
for l in label_ids:
    a =[l]
    l_ids.append(a)


In [39]:
# from transformers import BertTokenizer, BertModel
# import torch
# import numpy as np
# embeddings = []
# tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
# model = BertModel.from_pretrained('bert-base-uncased')
# for i in range(20):
#     input_ids = tokenizer(l_ids[i], return_tensors="pt")
#     output = model(**input_ids)
#     final_layer = output.last_hidden_state[:,0,:]
#     #print(final_layer.shape)
#     #hidden_shape = final_layer.shape
#     #embed = torch.reshape(final_layer,(1,hidden_shape[1]) )
#     embed = final_layer[0]
#     embeddings.append(embed.detach().numpy())
# embeddings = np.array(embeddings)
# embeddings.shape

In [40]:
# from nltk.cluster import KMeansClusterer
# import nltk

# def clustering_question(data,NUM_CLUSTERS = 5):
#     kclusterer = KMeansClusterer(
#         NUM_CLUSTERS, distance=nltk.cluster.util.cosine_distance,
#         repeats=1000,avoid_empty_clusters=True)

#     assigned_clusters = kclusterer.cluster(data, assign_clusters=True)

#     return assigned_clusters

In [41]:
# clusters = clustering_question(embeddings)
# clusters

In [42]:
# cluster_labels =[[],[],[],[],[]]
# cluster_ids = [[],[],[],[],[]]
# for i in range(20):
#     cluster_labels[clusters[i]].append(label_ids[i])
#     cluster_ids[clusters[i]].append(i)
# cluster_ids, cluster_labels

In [43]:
for i in range(20):
  print(i, label_ids[i])
print(np.nonzero(train_labels[0]), train_premises[0], train_labels[0])

0 Achievement
1 Benevolence: caring
2 Benevolence: dependability
3 Conformity: interpersonal
4 Conformity: rules
5 Face
6 Hedonism
7 Humility
8 Power: dominance
9 Power: resources
10 Security: personal
11 Security: societal
12 Self-direction: action
13 Self-direction: thought
14 Stimulation
15 Tradition
16 Universalism: concern
17 Universalism: nature
18 Universalism: objectivity
19 Universalism: tolerance
(array([11], dtype=int64),) if entrapment can serve to more easily capture wanted criminals, then why shouldn't it be legal? [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]


In [44]:
# 0 Self-direction: thought
# 1 Self-direction: action	
# 2 Stimulation	
# 3 Hedonism	
# 4 Achievement	
# 5 Power: dominance	
# 6 Power: resources	
# 7 Face	
# 8 Security: personal	
# 9 Security: societal	
# 10 Tradition	
# 11 Conformity: rules	
# 12 Conformity: interpersonal	
# 13 Humility	
# 14 Benevolence: caring	
# 15 Benevolence: dependability	
# 16 Universalism: concern	
# 17 Universalism: nature	
# 18 Universalism: tolerance	
# 19 Universalism: objectivity

In [45]:
# # Achievement, Face, Power: dominance, Power: resources [4, 7, 5, 6]

# #Benevolence: caring, Benevolence: dependability, Humility, Universalism: concern [14, 15, 13, 16]

# # Stimulation, Tradition, Self-direction: action, Self-direction: thought [2, 10, 1, 0]

# # Conformity: interpersonal, Conformity: rules, Security: personal, Security: societal [12, 11, 8, 9]

# # Hedonism, Universalism: nature, Universalism: objectivity, Universalism: tolerance [3, 17,18, 19]
# # Note: This is just one possible way to group these elements. There may be other valid ways to do so.


# clusters = [
#     [4, 7, 5, 6],
#     [14, 15, 13, 16],
#     [2, 10, 1, 0],
#     [12, 11, 8, 9],
#     [3, 17,18, 19]
# ]


In [46]:
# # Achievement, Face, Power: dominance, Power: resources [0, 5, 8, 9]


# #Benevolence: caring, Benevolence: dependability, Humility, Universalism: concern [1,2,7, 16]


# # Stimulation, Tradition, Self-direction: action, Self-direction: thought [14, 15, 12, 13]


# # Conformity: interpersonal, Conformity: rules, Security: personal, Security: societal [3, 4, 10, 11]


# # Hedonism, Universalism: nature, Universalism: objectivity, Universalism: tolerance [6, 17,18, 19]
# # Note: This is just one possible way to group these elements. There may be other valid ways to do so.


# clusters = [
#     [0, 5, 8, 9],
#     [1,2,7, 16],
#     [14, 15, 12, 13],
#     [3, 4, 10, 11],
#     [6, 17,18, 19]
# ]


In [47]:
def encode_labels(labels: List[List[int]]) -> torch.FloatTensor:
    """Turns the batch of labels into a tensor

    Args:
        labels (List[List[int]]): List of all labels in the batch

    Returns:
        torch.FloatTensor: Tensor of all labels in the batch
    """
    
    return torch.LongTensor(labels)


In [48]:
# check_labels = encode_labels(train_labels[0:10])
# np.nonzero(train_labels[9]), np.nonzero(check_labels[9])

In [49]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [50]:
train_label_batches = [b for b in chunk(train_labels, batch_size)]
train_label_batches = [encode_labels(batch) for batch in train_label_batches]

In [51]:
val_label_batches = [b for b in chunk(val_labels, batch_size)]
val_label_batches = [encode_labels(batch) for batch in val_label_batches]

In [52]:
val_label_batches[0][0]

tensor([1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1])

In [53]:
class LlabelClassifier(torch.nn.Module):
    def __init__(self, output_size: int, hidden_size: int):
        super().__init__()
        self.output_size = output_size
        self.hidden_size = hidden_size
        # Initialize BERT, which we use instead of a single embedding layer.
        self.bert = BertModel.from_pretrained("bert-base-uncased")
        # TODO [OPTIONAL]: Updating all BERT parameters can be slow and memory intensive. 
        # Freeze them if training is too slow. Notice that the learning
        # rate should probably be smaller in this case.
        # Uncommenting out the below 2 lines means only our classification layer will be updated.
        for param in self.bert.parameters():
            param.requires_grad = False
        self.bert_hidden_dimension = self.bert.config.hidden_size
        print(self.bert_hidden_dimension)
        # TODO: Add an extra hidden layer in the classifier, projecting
        #      from the BERT hidden dimension to hidden size.
        # TODO: Add a relu nonlinearity to be used in the forward method
        #      https://pytorch.org/docs/stable/generated/torch.nn.ReLU.html
        self.hidden_layer1 = torch.nn.Linear(self.bert_hidden_dimension, 512)
        self.hidden_layer2 = torch.nn.Linear(512, 64)
        self.relu = torch.nn.ReLU()
        self.classifier = torch.nn.Linear(64, 20)
        
        #self.classifier = torch.nn.Linear(self.hidden_size, self.output_size)
        self.log_softmax = torch.nn.LogSoftmax(dim=2)

    def encode_text(
        self,
        symbols: Dict
    ) -> torch.Tensor:
        """Encode the (batch of) sequence(s) of token symbols with an LSTM.
            Then, get the last (non-padded) hidden state for each symbol and return that.

        Args:
            symbols (Dict): The Dict of token specifications provided by the HuggingFace tokenizer

        Returns:
            torch.Tensor: The final hiddens tate of the LSTM, which represents an encoding of
                the entire sentence
        """
        # First we get the contextualized embedding for each input symbol
        # We no longer need an LSTM, since BERT encodes context and 
        # gives us a single vector describing the sequence in the form of the [CLS] token.
        embedded = self.bert(**symbols)
        #print(embedded)
        #print("Embedded", embedded.pooler_output.shape, embedded.last_hidden_state.shape)
        # TODO: Get the [CLS] token using the `pooler_output` from 
        #      The BertModel output. See here: https://huggingface.co/docs/transformers/model_doc/bert#transformers.BertModel
        #      and check the returns for the forward method.
        # We want to return a tensor of the form batch_size x 1 x bert_hidden_dimension
        #raise NotImplementedError
        
        #pool_output_shape = embedded.pooler_output.shape
        #return torch.reshape(embedded.pooler_output,(pool_output_shape[0],1,pool_output_shape[1]) )
        last_hidden_state = embedded.last_hidden_state[:,0,:]
        hidden_shape = last_hidden_state.shape
        return torch.reshape(last_hidden_state,(hidden_shape[0],1,hidden_shape[1]) )

    def forward(
        self,
        symbols: Dict,
    ) -> torch.Tensor:
        """_summary_

        Args:
            symbols (Dict): The Dict of token specifications provided by the HuggingFace tokenizer

        Returns:
            torch.Tensor: _description_
        """
        encoded_sents = self.encode_text(symbols)
        #output = self.hidden_layer1(encoded_sents)
        #output = self.relu(output)
        #outputs = [self.hidden_layers[i](encoded_sents) for i in range(self.output_size)]
        #outputs = [self.relu(outputs[i].to(device)) for i in range(self.output_size)]
        #outputs = [self.classifiers[i](outputs[i].to(device)) for i in range(self.output_size)]
        # outputs = []
        # for i in range(self.output_size):
        #     output = self.hidden_layers[i](encoded_sents)
        #     output = self.relu(output)
        #     output = self.classifiers[i](output)
        #     output = torch.nn.Sigmoid()(output)
        #     outputs.append(output)
        output = self.hidden_layer1(encoded_sents)
        output = self.relu(output)
        output = self.hidden_layer2(output)
        output = self.relu(output)
        output = self.classifier(output)
        #output = self.log_softmax(output)
        #output = torch.nn.Sigmoid(output)

        return output

In [54]:
def predict(model: torch.nn.Module, sents: torch.Tensor) -> List:
    sents = sents.to(device)
    logits = model(sents)
    res = []
    logitslen = len(logits)
    #print(logits[0].shape)
    for i in range(logitslen):
        datares = []
        for j in range(20):
            datares.append(logits[i][0][j] > 0.5)
        res.append(datares)
    return res


In [55]:
def f1Score_multiLabel(preds, labels):
    nLabels = 20
    relevants = [0]*20
    positives = [0]*20
    truePositives = [0]*20
    correct = [0]*20
    for i in range(len(preds)):
        for j in range(nLabels):
            if(preds[i][j]==1):
                positives[j] += 1
                if(labels[i][j]==1):
                    truePositives[j] += 1
    for i in range(len(preds)):
        for j in range(nLabels):
            if(preds[i][j]==labels[i][j]):
                correct[j] += 1
    
    for i in range(len(labels)):
        for j in range(nLabels):
            if(labels[i][j]==1):
                relevants[j] += 1
    
    precisions = []*nLabels
    recalls = []*nLabels
    f1Scores = []*nLabels
    accuracies = []*nLabels
    
    precision =0
    recall = 0
    f1 = 0
    #print(truePositives, positives, relevants)
    for i in range(nLabels):
        if(positives[i]>0):
            precision = truePositives[i]/positives[i]
        precisions.append(precision)
        if(relevants[i]>0):
            recall = truePositives[i]/relevants[i]
        recalls.append(recall)
        #print(precision,recall,i)
        if(precision>0 and recall>0):
            f1 = 2 * precision * recall / (precision + recall)
        f1Scores.append(f1)
        accuracies.append(correct[i]/len(preds))
    precision_mean = np.mean(precisions)
    recall_mean = np.mean(recalls)
    f1_mean = np.mean(f1Scores)
    accuracy = np.mean(accuracies)
    return f1_mean, precision_mean, recall_mean, accuracy, f1Scores, precisions, recalls, accuracies
    


In [56]:
import random
from tqdm import tqdm_notebook as tqdm
def training_loop(
    num_epochs,
    train_features,
    train_labels,
    dev_sents,
    dev_labels,
    optimizer,
    scheduler,
    model,
):
    print("Training...")
    all_f1 = []
    all_P = []
    all_R = []
    all_L = []
    all_CELoss = []
    all_HMLoss = []
    all_acc = []
    loss_func = torch.nn.CrossEntropyLoss()
    batches = list(zip(train_features, train_labels))
    random.shuffle(batches)
    for i in range(num_epochs):
        losses = []
        #CELosses = []
        #HMLosses = []
        for features, labels in tqdm(batches):
            # Empty the dynamic computation graph
            features = features.to(device)
            labels = labels.float()
            labels = labels.to(device)
            optimizer.zero_grad()
            preds = model(features)
            loss = loss_func(preds.squeeze(1),labels)
            
            
            # Backpropogate the loss through our model
            loss.backward()
            optimizer.step()
            losses.append(loss.item())
        
        print(f"epoch {i}, loss: {np.sum(losses)/len(losses)}")
        # Estimate the f1 score for the development set
        print("Evaluating dev...")
        all_preds = []
        all_labels = []
        for sents, labels in tqdm(zip(dev_sents, dev_labels), total=len(dev_sents)):
            sents = sents.to(device)
            pred = predict(model, sents)
            all_preds.extend(pred)
            all_labels.extend(list(labels))
        # #print(range(len(set(train_labels))))

        dev_f1, dev_P, dev_R, dev_acc, dev_all_f1, dev_all_P, dev_all_R, dev_all_acc = f1Score_multiLabel(all_preds, all_labels)
        print(f"Dev F1 {dev_f1},  Dev Precision {dev_P}, Dev Recall {dev_R}, Dev Accuracy {dev_acc}")
        all_f1.append(dev_all_f1)
        all_P.append(dev_all_P)
        all_R.append(dev_all_R)
        all_L.append(losses)
        all_acc.append(dev_all_acc)
        scheduler.step()
        #print(optimizer)
    # Return the trained model
    with open("all_f1_base.csv", 'ab') as abc:
        np.savetxt(abc, 
               all_f1,
               delimiter =", ", 
               fmt ='%s')
    with open("all_P_base.csv", 'ab') as abc:
        np.savetxt(abc, 
               all_P,
               delimiter =", ", 
               fmt ='%s')
    with open("all_R_base.csv", 'ab') as abc:
        np.savetxt(abc, 
               all_R,
               delimiter =", ", 
               fmt ='%s')
    with open("all_acc_base.csv", 'ab') as abc:
        np.savetxt(abc, 
               all_acc,
               delimiter =", ", 
               fmt ='%s')
    with open("all_L_base.csv", 'ab') as abc:
        np.savetxt(abc, 
               all_L,
               delimiter =", ", 
               fmt ='%s')
    return model

In [57]:
from transformers.optimization import get_linear_schedule_with_warmup
epochs = 200
epoch_warmup = 40
# TODO: Find a good learning rate
LR = 1e-4

possible_labels = 20
model = LlabelClassifier(output_size=possible_labels, hidden_size=512)
model = model.to(device)
optimizer = torch.optim.AdamW(model.parameters(), LR)
scheduler = get_linear_schedule_with_warmup(optimizer, epoch_warmup,epochs)


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


768


In [57]:
model =training_loop(
    epochs,
    train_input_batches,
    train_label_batches,
    val_input_batches,
    val_label_batches,
    optimizer,
    scheduler,
    model,
)

Training...


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for features, labels in tqdm(batches):


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 0, loss: 10.415994537410452
Evaluating dev...


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for sents, labels in tqdm(zip(dev_sents, dev_labels), total=len(dev_sents)):


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.0,  Dev Precision 0.0, Dev Recall 0.0, Dev Accuracy 0.8272563176895307


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 1, loss: 10.395622943764302
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.0,  Dev Precision 0.0, Dev Recall 0.0, Dev Accuracy 0.8272563176895307


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 2, loss: 10.32534938783788
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.0,  Dev Precision 0.0, Dev Recall 0.0, Dev Accuracy 0.8272563176895307


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 3, loss: 10.198371773335472
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.0,  Dev Precision 0.0, Dev Recall 0.0, Dev Accuracy 0.8270758122743682


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 4, loss: 10.040344850340887
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.11093333333333333,  Dev Precision 0.07732342007434945, Dev Recall 0.04905660377358491, Dev Accuracy 0.8162454873646208


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 5, loss: 9.877425542518274
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.3186348430918152,  Dev Precision 0.27424554775252996, Dev Recall 0.17595194327731092, Dev Accuracy 0.7727436823104693


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 6, loss: 9.753828226630368
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.37395565298409084,  Dev Precision 0.2417184287371692, Dev Recall 0.23752188375350142, Dev Accuracy 0.7371841155234657


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 7, loss: 9.681531813607288
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.3738286635003057,  Dev Precision 0.23903115988133936, Dev Recall 0.24639880952380952, Dev Accuracy 0.7310469314079422


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 8, loss: 9.627614640477878
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.36828922455211005,  Dev Precision 0.2856057270756012, Dev Recall 0.2780238673139159, Dev Accuracy 0.7324909747292419


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 9, loss: 9.576127322752084
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.40577751868735285,  Dev Precision 0.2913460186874738, Dev Recall 0.2973680218446602, Dev Accuracy 0.731768953068592


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 10, loss: 9.521788006398216
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.40649201454229084,  Dev Precision 0.28234905389709725, Dev Recall 0.3048425899486008, Dev Accuracy 0.7297833935018051


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 11, loss: 9.463850633421941
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.36040717501081687,  Dev Precision 0.39307213780679023, Dev Recall 0.3028705635107465, Dev Accuracy 0.7353790613718412


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 12, loss: 9.402460895367522
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.40566136268231484,  Dev Precision 0.35004540117401906, Dev Recall 0.31064222281974596, Dev Accuracy 0.7427797833935019


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 13, loss: 9.338015513633614
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4321530000422721,  Dev Precision 0.3528601735157807, Dev Recall 0.31505541289745825, Dev Accuracy 0.7550541516245488


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 14, loss: 9.271768975613723
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.38606985057139803,  Dev Precision 0.41281421671048407, Dev Recall 0.3144618972085637, Dev Accuracy 0.7651624548736462


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 15, loss: 9.205612410360308
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.37339362464064835,  Dev Precision 0.38527528414357043, Dev Recall 0.31427156118777344, Dev Accuracy 0.7711191335740073


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 16, loss: 9.139329120294372
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4184149492373894,  Dev Precision 0.4113849550780321, Dev Recall 0.33275760215162103, Dev Accuracy 0.7832129963898917


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 17, loss: 9.071909029092362
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.43578296127413224,  Dev Precision 0.41864897590409295, Dev Recall 0.3456274243773279, Dev Accuracy 0.7904332129963898


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 18, loss: 9.00761352368255
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.42470045901808656,  Dev Precision 0.4721282087010012, Dev Recall 0.3674024327309219, Dev Accuracy 0.7954873646209386


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 19, loss: 8.947146913898525
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.437934030335472,  Dev Precision 0.43046195742005944, Dev Recall 0.3822766695358063, Dev Accuracy 0.7974729241877256


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 20, loss: 8.891614849887677
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.43983777664907137,  Dev Precision 0.4035354121163473, Dev Recall 0.3909722262867589, Dev Accuracy 0.7978339350180506


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 21, loss: 8.840509742053587
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.45887173565467104,  Dev Precision 0.40479320807156344, Dev Recall 0.41899138310293466, Dev Accuracy 0.7956678700361011


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 22, loss: 8.793473015970259
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46250525424431455,  Dev Precision 0.36905238595492157, Dev Recall 0.42838011234840084, Dev Accuracy 0.7945848375451264


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 23, loss: 8.750296393437171
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4688375019981298,  Dev Precision 0.37037906412126864, Dev Recall 0.4353097774706569, Dev Accuracy 0.7936823104693141


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 24, loss: 8.710402047456201
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.47326096734709544,  Dev Precision 0.35308111766605293, Dev Recall 0.4465739956283222, Dev Accuracy 0.7922382671480146


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 25, loss: 8.67342083489717
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.48093008948725346,  Dev Precision 0.35457260890597336, Dev Recall 0.45451262403333903, Dev Accuracy 0.7897111913357401


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 26, loss: 8.639206566027742
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4715988684147412,  Dev Precision 0.3644331832107445, Dev Recall 0.46174296094965905, Dev Accuracy 0.7882671480144404


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 27, loss: 8.607537383463844
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4724047062598828,  Dev Precision 0.3621697875426777, Dev Recall 0.4638628379280266, Dev Accuracy 0.788086642599278


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 28, loss: 8.577629900690335
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.47777804109463806,  Dev Precision 0.36379854514691856, Dev Recall 0.4682108488370204, Dev Accuracy 0.7870036101083032


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 29, loss: 8.549723625183105
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4636217004073644,  Dev Precision 0.3784398941047536, Dev Recall 0.4957296290501604, Dev Accuracy 0.7871841155234657


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 30, loss: 8.523429301247669
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46958969880589646,  Dev Precision 0.3854257611673246, Dev Recall 0.49884898610804596, Dev Accuracy 0.7864620938628158


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 31, loss: 8.498784534966768
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46892937330197687,  Dev Precision 0.382380502651691, Dev Recall 0.500492672610223, Dev Accuracy 0.7853790613718411


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 32, loss: 8.475353162680099
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.47013478738202813,  Dev Precision 0.3820752260324958, Dev Recall 0.5047109307670096, Dev Accuracy 0.7853790613718412


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 33, loss: 8.453091856259018
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4699549204188528,  Dev Precision 0.3682679409204988, Dev Recall 0.5042254938738057, Dev Accuracy 0.7851985559566786


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 34, loss: 8.431770417227673
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.482626041333088,  Dev Precision 0.34603074416779284, Dev Recall 0.48141263092374764, Dev Accuracy 0.7864620938628158


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 35, loss: 8.411420032159606
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.48372629452536026,  Dev Precision 0.34581908482376533, Dev Recall 0.48445505978553627, Dev Accuracy 0.785740072202166


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 36, loss: 8.391305233115581
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4816533928951202,  Dev Precision 0.3438345914716561, Dev Recall 0.48153839311886965, Dev Accuracy 0.7842960288808664


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 37, loss: 8.371774203741728
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.48179006847796113,  Dev Precision 0.3436412396229372, Dev Recall 0.48272555824608016, Dev Accuracy 0.7839350180505414


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 38, loss: 8.353108299312307
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.48250640327153327,  Dev Precision 0.34450713877893674, Dev Recall 0.4827776415794135, Dev Accuracy 0.7850180505415162


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 39, loss: 8.334898749394203
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.481137263252137,  Dev Precision 0.3443921849494981, Dev Recall 0.48138812423401384, Dev Accuracy 0.7851985559566786


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 40, loss: 8.317069694177428
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.44932272144722746,  Dev Precision 0.3676163716010594, Dev Recall 0.4818047909006805, Dev Accuracy 0.7859205776173284


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 41, loss: 8.298955896007481
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4472076174142189,  Dev Precision 0.3413489405879554, Dev Recall 0.4783381877318368, Dev Accuracy 0.785379061371841


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 42, loss: 8.281729619894454
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.44937560105984414,  Dev Precision 0.3342660923792528, Dev Recall 0.47886183638048535, Dev Accuracy 0.785379061371841


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 43, loss: 8.26511207979117
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4459960605000325,  Dev Precision 0.3272936187512402, Dev Recall 0.4780694424998562, Dev Accuracy 0.7841155234657039


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 44, loss: 8.24928850202418
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4476219064982737,  Dev Precision 0.32869582964708355, Dev Recall 0.4816408710712848, Dev Accuracy 0.7837545126353789


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 45, loss: 8.234140154141098
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4501906551836776,  Dev Precision 0.33658725207010287, Dev Recall 0.48372420440461805, Dev Accuracy 0.7839350180505414


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 46, loss: 8.219479240588289
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.44856882385318686,  Dev Precision 0.3324081294272344, Dev Recall 0.4824421531225668, Dev Accuracy 0.7841155234657038


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 47, loss: 8.20523671249845
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.45075213356402805,  Dev Precision 0.33773766681717315, Dev Recall 0.482992385410604, Dev Accuracy 0.7842960288808664


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 48, loss: 8.19164318824882
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4567427289040233,  Dev Precision 0.34584032912732543, Dev Recall 0.4890638139820326, Dev Accuracy 0.7851985559566786


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 49, loss: 8.17845766579927
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4458306175863139,  Dev Precision 0.3526250149681466, Dev Recall 0.4930915917598104, Dev Accuracy 0.7864620938628157


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 50, loss: 8.16553913657345
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.44543049124810885,  Dev Precision 0.34801593445260515, Dev Recall 0.493059540477759, Dev Accuracy 0.7864620938628158


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 51, loss: 8.152922630310059
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.44590980118273216,  Dev Precision 0.349846358716957, Dev Recall 0.49302748919570777, Dev Accuracy 0.7871841155234657


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 52, loss: 8.140694212557664
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.43468733053102904,  Dev Precision 0.3883559754731707, Dev Recall 0.4995730556981709, Dev Accuracy 0.7873646209386281


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 53, loss: 8.128771411838816
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4336871234914539,  Dev Precision 0.3861649262012212, Dev Recall 0.4995730556981709, Dev Accuracy 0.7866425992779783


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 54, loss: 8.117183400623833
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.437252320080922,  Dev Precision 0.38873257871745615, Dev Recall 0.502073055698171, Dev Accuracy 0.787725631768953


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 55, loss: 8.105985620128575
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.435674843145765,  Dev Precision 0.3872181192495181, Dev Recall 0.5018326710827863, Dev Accuracy 0.7879061371841155


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 56, loss: 8.094947188647826
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.43605774246878576,  Dev Precision 0.387265613564919, Dev Recall 0.5018326710827863, Dev Accuracy 0.788086642599278


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 57, loss: 8.084195229544568
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.43852636507149445,  Dev Precision 0.38885507780075523, Dev Recall 0.5048043691959938, Dev Accuracy 0.7879061371841155


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 58, loss: 8.073614341109547
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4396920575979501,  Dev Precision 0.38955220072379865, Dev Recall 0.5066426044901114, Dev Accuracy 0.787725631768953


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 59, loss: 8.063319248939628
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4428901898716231,  Dev Precision 0.3905872700933381, Dev Recall 0.5075854924211459, Dev Accuracy 0.7882671480144404


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 60, loss: 8.053233196486287
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4427123915858099,  Dev Precision 0.3894007189376688, Dev Recall 0.5068414448020983, Dev Accuracy 0.7873646209386281


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 61, loss: 8.043164352872479
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4451477761855118,  Dev Precision 0.37389154044201783, Dev Recall 0.5117149567068602, Dev Accuracy 0.7875451263537906


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 62, loss: 8.033467484943902
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.44740606278501316,  Dev Precision 0.3774144364811687, Dev Recall 0.5143549319452413, Dev Accuracy 0.7879061371841155


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 63, loss: 8.023941758853287
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.44739703477325915,  Dev Precision 0.37805750958514805, Dev Recall 0.514161917239359, Dev Accuracy 0.787725631768953


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 64, loss: 8.014434195276516
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.44704066564834066,  Dev Precision 0.3778308015629938, Dev Recall 0.514161917239359, Dev Accuracy 0.7871841155234657


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 65, loss: 8.005265968949047
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.44543971184149755,  Dev Precision 0.3753915613764367, Dev Recall 0.513205043923995, Dev Accuracy 0.7861010830324909


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 66, loss: 7.996195152624329
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4482782567533656,  Dev Precision 0.37913660760299644, Dev Recall 0.5135302218906825, Dev Accuracy 0.7864620938628158


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 67, loss: 7.9873359523602385
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.447826789876187,  Dev Precision 0.37219854126714097, Dev Recall 0.513058523777475, Dev Accuracy 0.7859205776173286


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 68, loss: 7.978531460263836
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.44674549449934664,  Dev Precision 0.37044619883392715, Dev Recall 0.5119156385573492, Dev Accuracy 0.7853790613718412


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 69, loss: 7.9699963882787905
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.44751596815748107,  Dev Precision 0.3734966572630555, Dev Recall 0.5109677499679511, Dev Accuracy 0.7859205776173286


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 70, loss: 7.961589621074164
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4497520739065065,  Dev Precision 0.37687558934665477, Dev Recall 0.5114252010057818, Dev Accuracy 0.7857400722021661


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 71, loss: 7.953216972635754
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4514406753980967,  Dev Precision 0.3790606895090415, Dev Recall 0.5123730895951799, Dev Accuracy 0.7868231046931409


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 72, loss: 7.9450438058198385
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4546257280188679,  Dev Precision 0.38183353532393555, Dev Recall 0.515350175639422, Dev Accuracy 0.7871841155234658


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 73, loss: 7.937038058665261
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4549126247762992,  Dev Precision 0.3816521045013462, Dev Recall 0.515350175639422, Dev Accuracy 0.7870036101083032


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 74, loss: 7.929104022125699
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4530725402166066,  Dev Precision 0.38012239311358, Dev Recall 0.5143085089727553, Dev Accuracy 0.7866425992779783


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 75, loss: 7.921311143618911
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4561886751843069,  Dev Precision 0.3836926171574827, Dev Recall 0.5155564564275992, Dev Accuracy 0.7873646209386282


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 76, loss: 7.913742563617763
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4593452260953299,  Dev Precision 0.3822374860003501, Dev Recall 0.5185305943586338, Dev Accuracy 0.7879061371841156


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 77, loss: 7.906209084525037
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4597096586720646,  Dev Precision 0.3824872839796571, Dev Recall 0.5199250619599979, Dev Accuracy 0.7888086642599278


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 78, loss: 7.898845992871185
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4612114687135442,  Dev Precision 0.3829747438228331, Dev Recall 0.5219542594148419, Dev Accuracy 0.7879061371841156


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 79, loss: 7.891445124327247
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4619180696047331,  Dev Precision 0.38348278407489056, Dev Recall 0.5215673546529371, Dev Accuracy 0.7879061371841156


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 80, loss: 7.884187918990405
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46140439760528584,  Dev Precision 0.38314851735962363, Dev Recall 0.5209575985553762, Dev Accuracy 0.7873646209386282


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 81, loss: 7.87707210654643
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4638668397961861,  Dev Precision 0.38325177904184426, Dev Recall 0.5254154898020607, Dev Accuracy 0.7877256317689532


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 82, loss: 7.870086143265909
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4637578484958015,  Dev Precision 0.3827290036930816, Dev Recall 0.52513838624219, Dev Accuracy 0.7871841155234658


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 83, loss: 7.8632927510275765
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4641325674038431,  Dev Precision 0.38276864088180496, Dev Recall 0.5265669576707614, Dev Accuracy 0.7870036101083033


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 84, loss: 7.856499842743375
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4640818652640327,  Dev Precision 0.3820807168905009, Dev Recall 0.5270523945639652, Dev Accuracy 0.7870036101083033


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 85, loss: 7.849797362711892
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4644516498603887,  Dev Precision 0.38319196998860605, Dev Recall 0.5270523945639652, Dev Accuracy 0.7870036101083033


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 86, loss: 7.843192904742796
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46465276251814647,  Dev Precision 0.3834535038707026, Dev Recall 0.5276406298580829, Dev Accuracy 0.7871841155234658


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 87, loss: 7.836672825599784
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4638473155809487,  Dev Precision 0.38060073469285605, Dev Recall 0.5276406298580829, Dev Accuracy 0.7866425992779784


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 88, loss: 7.830319689280951
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4652781420490769,  Dev Precision 0.3811625153043494, Dev Recall 0.5293647677891175, Dev Accuracy 0.786462093862816


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 89, loss: 7.82411025887105
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46674879742290826,  Dev Precision 0.38109112427880343, Dev Recall 0.5317689371666927, Dev Accuracy 0.786462093862816


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 90, loss: 7.817895889282227
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4672182449021878,  Dev Precision 0.37980479266622214, Dev Recall 0.5317689371666927, Dev Accuracy 0.7868231046931408


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 91, loss: 7.811822154628697
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46674182783802304,  Dev Precision 0.37968151462429145, Dev Recall 0.5312972390534851, Dev Accuracy 0.7866425992779784


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 92, loss: 7.805787570440947
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46584656144199704,  Dev Precision 0.3797373730241482, Dev Recall 0.5277258104820566, Dev Accuracy 0.7870036101083032


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 93, loss: 7.799825372980602
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46600162497728004,  Dev Precision 0.379939586261829, Dev Recall 0.52777789381539, Dev Accuracy 0.7870036101083032


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 94, loss: 7.793973712778803
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4651842958820954,  Dev Precision 0.37940610672001324, Dev Recall 0.5273017033391996, Dev Accuracy 0.7864620938628158


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 95, loss: 7.788132389979576
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46627424453375826,  Dev Precision 0.37977628536426383, Dev Recall 0.5285517033391995, Dev Accuracy 0.7870036101083032


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 96, loss: 7.782441694345048
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46708054817476297,  Dev Precision 0.38098123001690387, Dev Recall 0.5285517033391995, Dev Accuracy 0.7873646209386281


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 97, loss: 7.776741276926069
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46790011409459914,  Dev Precision 0.38205776602463, Dev Recall 0.5295933700058661, Dev Accuracy 0.7875451263537906


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 98, loss: 7.771262656396894
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.466774402893249,  Dev Precision 0.3799937371097916, Dev Recall 0.5278941222143854, Dev Accuracy 0.787364620938628


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 99, loss: 7.765701098228568
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46908810237035165,  Dev Precision 0.38302233006672853, Dev Recall 0.529404538881052, Dev Accuracy 0.7879061371841154


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 100, loss: 7.760402608273634
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46944783171552495,  Dev Precision 0.38321184553921617, Dev Recall 0.5300870101454198, Dev Accuracy 0.7880866425992779


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 101, loss: 7.755003900670293
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.47273917474234894,  Dev Precision 0.3863151361760027, Dev Recall 0.532852814743121, Dev Accuracy 0.7886281588447652


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 102, loss: 7.749753300823382
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4722713077149189,  Dev Precision 0.3849838511512252, Dev Recall 0.532852814743121, Dev Accuracy 0.7884476534296028


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 103, loss: 7.744605487851954
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.47206408234698394,  Dev Precision 0.3850546402810606, Dev Recall 0.5320194814097877, Dev Accuracy 0.7886281588447652


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 104, loss: 7.739478940394387
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4720372505616382,  Dev Precision 0.38451088578993764, Dev Recall 0.5320194814097877, Dev Accuracy 0.7886281588447652


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 105, loss: 7.734394770949634
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.47144062368914924,  Dev Precision 0.3826026477500015, Dev Recall 0.5320194814097877, Dev Accuracy 0.7884476534296028


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 106, loss: 7.729411196352831
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4714536482600457,  Dev Precision 0.38150849019755484, Dev Recall 0.5324911795229952, Dev Accuracy 0.7884476534296028


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 107, loss: 7.724587753637513
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.47255345002015947,  Dev Precision 0.382431518215448, Dev Recall 0.5339197509515666, Dev Accuracy 0.7891696750902526


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 108, loss: 7.719753813387743
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4732800540102112,  Dev Precision 0.3833579987725103, Dev Recall 0.534406634894333, Dev Accuracy 0.7895306859205775


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 109, loss: 7.7150095363161455
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.47286274526196037,  Dev Precision 0.38264571328470826, Dev Recall 0.534406634894333, Dev Accuracy 0.7891696750902526


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 110, loss: 7.71031783587897
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4732725855694741,  Dev Precision 0.3828857870236432, Dev Recall 0.5352469710287868, Dev Accuracy 0.7891696750902526


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 111, loss: 7.705648408007266
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4732530737364753,  Dev Precision 0.38321172105548096, Dev Recall 0.5351480594952623, Dev Accuracy 0.7888086642599277


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 112, loss: 7.701164128175423
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4719379320016568,  Dev Precision 0.3818914033697074, Dev Recall 0.533796708143911, Dev Accuracy 0.7882671480144403


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 113, loss: 7.696665176704748
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.47172123322406173,  Dev Precision 0.38181889056821156, Dev Recall 0.53318695204635, Dev Accuracy 0.7882671480144403


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 114, loss: 7.692207763444132
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4714136460743693,  Dev Precision 0.3816558738359281, Dev Recall 0.532577195948789, Dev Accuracy 0.7880866425992779


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 115, loss: 7.687876242310254
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4717936438258571,  Dev Precision 0.3820019751314348, Dev Recall 0.5331334257222518, Dev Accuracy 0.7882671480144403


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 116, loss: 7.683530408944657
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4704748717423012,  Dev Precision 0.3811007577167946, Dev Recall 0.5312048993981535, Dev Accuracy 0.7875451263537905


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 117, loss: 7.67921507892324
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.471741105508535,  Dev Precision 0.3818130084290453, Dev Recall 0.5329290373291881, Dev Accuracy 0.7877256317689529


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 118, loss: 7.6751384592767975
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4708131785357891,  Dev Precision 0.3814872669550017, Dev Recall 0.5315776859778367, Dev Accuracy 0.787364620938628


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 119, loss: 7.671025158754036
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4705088547760889,  Dev Precision 0.3812752596038092, Dev Recall 0.5307964359778368, Dev Accuracy 0.787364620938628


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 120, loss: 7.66697359085083
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4697769353624489,  Dev Precision 0.3798128267757366, Dev Recall 0.5302082006837191, Dev Accuracy 0.7868231046931407


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 121, loss: 7.662961077334276
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46970319348864653,  Dev Precision 0.3793692855227141, Dev Recall 0.5302082006837191, Dev Accuracy 0.7868231046931407


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 122, loss: 7.6590388212631
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46940098178360257,  Dev Precision 0.3794196367451168, Dev Recall 0.5297365025705115, Dev Accuracy 0.7870036101083031


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 123, loss: 7.655143937068199
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46941523796563384,  Dev Precision 0.37895753478731187, Dev Recall 0.5307781692371781, Dev Accuracy 0.7868231046931407


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 124, loss: 7.6513775142271125
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46969780684174334,  Dev Precision 0.37879817855219206, Dev Recall 0.5317210571682127, Dev Accuracy 0.7864620938628158


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 125, loss: 7.647567439435133
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4692211302051808,  Dev Precision 0.37862017752436306, Dev Recall 0.5302924857396413, Dev Accuracy 0.7866425992779783


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 126, loss: 7.643837576481833
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46552056520776447,  Dev Precision 0.36331387471886606, Dev Recall 0.5281771011242565, Dev Accuracy 0.7859205776173285


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 127, loss: 7.640158091018449
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46837185190081004,  Dev Precision 0.3488928247999685, Dev Recall 0.5293426152253977, Dev Accuracy 0.7855595667870034


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 128, loss: 7.636558369024476
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4667348715362902,  Dev Precision 0.3479269060403282, Dev Recall 0.526899190921714, Dev Accuracy 0.7850180505415161


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 129, loss: 7.633029923510196
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4668601294201687,  Dev Precision 0.3480548795422855, Dev Recall 0.526899190921714, Dev Accuracy 0.7851985559566785


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 130, loss: 7.629624409462089
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4694275504280266,  Dev Precision 0.3493999926478554, Dev Recall 0.5281812422037653, Dev Accuracy 0.7851985559566785


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 131, loss: 7.62612899381723
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4702755524330115,  Dev Precision 0.3496670313177066, Dev Recall 0.5294312422037654, Dev Accuracy 0.785379061371841


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 132, loss: 7.622841756735275
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4706880086680535,  Dev Precision 0.34961339144921577, Dev Recall 0.5299029403169728, Dev Accuracy 0.7855595667870034


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 133, loss: 7.619559248881553
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46998695217342706,  Dev Precision 0.3490293518564437, Dev Recall 0.5294312422037654, Dev Accuracy 0.785379061371841


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 134, loss: 7.616309514686243
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4703147039811492,  Dev Precision 0.34876053485496017, Dev Recall 0.5299029403169728, Dev Accuracy 0.7857400722021659


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 135, loss: 7.613136622443128
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.47071657846864456,  Dev Precision 0.34891267982731283, Dev Recall 0.5303746384301804, Dev Accuracy 0.7861010830324908


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 136, loss: 7.609976722233331
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4695841586155722,  Dev Precision 0.3482410752416667, Dev Recall 0.529293184219412, Dev Accuracy 0.7855595667870034


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 137, loss: 7.606946119621618
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46859343545354015,  Dev Precision 0.3476742028282623, Dev Recall 0.5273183379905031, Dev Accuracy 0.7851985559566785


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 138, loss: 7.603964976410367
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.468551252659209,  Dev Precision 0.34858797389473073, Dev Recall 0.5258897665619318, Dev Accuracy 0.7861010830324908


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 139, loss: 7.600919431714869
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4682294208996006,  Dev Precision 0.34842678938036775, Dev Recall 0.5250564332285984, Dev Accuracy 0.7861010830324908


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 140, loss: 7.598057177529406
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46878842711078067,  Dev Precision 0.3497051984712769, Dev Recall 0.5250564332285984, Dev Accuracy 0.7862815884476533


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 141, loss: 7.595178013417258
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46778981265433306,  Dev Precision 0.3483322763935684, Dev Recall 0.5242751832285985, Dev Accuracy 0.7861010830324908


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 142, loss: 7.592311713232923
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4681484252950704,  Dev Precision 0.34920446474268624, Dev Recall 0.5242751832285985, Dev Accuracy 0.7861010830324908


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 143, loss: 7.589535143838
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46783238217712036,  Dev Precision 0.349194868112518, Dev Recall 0.5232015110412769, Dev Accuracy 0.7861010830324908


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 144, loss: 7.586899273431123
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46704526473255586,  Dev Precision 0.34889855373711376, Dev Recall 0.5219485629280693, Dev Accuracy 0.7859205776173284


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 145, loss: 7.584306040806557
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4672644204484744,  Dev Precision 0.3491971006476115, Dev Recall 0.5219485629280693, Dev Accuracy 0.7862815884476533


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 146, loss: 7.5817015598069375
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4682188023700692,  Dev Precision 0.3493523754425539, Dev Recall 0.5236727008591039, Dev Accuracy 0.7864620938628157


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 147, loss: 7.57916914527096
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4681689073895126,  Dev Precision 0.34945218643953685, Dev Recall 0.5236727008591039, Dev Accuracy 0.7866425992779782


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 148, loss: 7.576689357188211
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46762907893042405,  Dev Precision 0.3490409313253302, Dev Recall 0.5228914508591039, Dev Accuracy 0.7862815884476533


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 149, loss: 7.574348029805653
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4681186855573428,  Dev Precision 0.34962491088178727, Dev Recall 0.5228914508591039, Dev Accuracy 0.7868231046931407


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 150, loss: 7.5719556594962505
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4681186855573428,  Dev Precision 0.34962491088178727, Dev Recall 0.5228914508591039, Dev Accuracy 0.7866425992779782


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 151, loss: 7.569646458127606
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4674577891178928,  Dev Precision 0.348354591484355, Dev Recall 0.5224060139659, Dev Accuracy 0.7866425992779782


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 152, loss: 7.5673830971789
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4673665582952181,  Dev Precision 0.34811838906084525, Dev Recall 0.5224060139659, Dev Accuracy 0.7868231046931407


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 153, loss: 7.565248375508323
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.467383045832168,  Dev Precision 0.3483158810418166, Dev Recall 0.5213643472992333, Dev Accuracy 0.787364620938628


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 154, loss: 7.563106309122114
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46750218625856793,  Dev Precision 0.3487058660667043, Dev Recall 0.5213643472992333, Dev Accuracy 0.7877256317689529


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 155, loss: 7.561008439135196
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4682516555845285,  Dev Precision 0.3499986104082727, Dev Recall 0.5213643472992333, Dev Accuracy 0.7888086642599277


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 156, loss: 7.558973123778158
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4680755064936153,  Dev Precision 0.3499519024046829, Dev Recall 0.5213643472992333, Dev Accuracy 0.7888086642599277


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 157, loss: 7.556915966432486
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46798808289756033,  Dev Precision 0.350011601171455, Dev Recall 0.5207761120051158, Dev Accuracy 0.7889891696750901


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 158, loss: 7.555026008122003
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46774061656216237,  Dev Precision 0.3497895642260429, Dev Recall 0.5202906751119117, Dev Accuracy 0.7889891696750901


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 159, loss: 7.5531243317162815
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4680059545695621,  Dev Precision 0.3501346603944923, Dev Recall 0.5202906751119117, Dev Accuracy 0.7893501805054151


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 160, loss: 7.551274513130758
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4677592438300975,  Dev Precision 0.35030955833035454, Dev Recall 0.5182073417785784, Dev Accuracy 0.7898916967509024


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 161, loss: 7.549421669831917
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46733071609180515,  Dev Precision 0.3501734339976722, Dev Recall 0.5174260917785785, Dev Accuracy 0.7898916967509024


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 162, loss: 7.547657696168814
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4676801841618522,  Dev Precision 0.3506704819832991, Dev Recall 0.5174260917785785, Dev Accuracy 0.7904332129963898


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 163, loss: 7.545933556200853
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4670962520147957,  Dev Precision 0.3503239956028203, Dev Recall 0.5163844251119117, Dev Accuracy 0.7902527075812273


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 164, loss: 7.544204117646858
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4670962520147957,  Dev Precision 0.3503239956028203, Dev Recall 0.5163844251119117, Dev Accuracy 0.7902527075812273


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 165, loss: 7.54250774098866
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46738610858850416,  Dev Precision 0.3507894425224417, Dev Recall 0.5162723802939846, Dev Accuracy 0.7904332129963898


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 166, loss: 7.540775284838321
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46738610858850416,  Dev Precision 0.3507894425224417, Dev Recall 0.5162723802939846, Dev Accuracy 0.7902527075812273


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 167, loss: 7.539134399214787
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46737665827482655,  Dev Precision 0.3510580299259813, Dev Recall 0.5157961898177941, Dev Accuracy 0.7904332129963898


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 168, loss: 7.537475575262041
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46737665827482655,  Dev Precision 0.3510580299259813, Dev Recall 0.5157961898177941, Dev Accuracy 0.7904332129963898


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 169, loss: 7.5358364297382865
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4675089617862332,  Dev Precision 0.3512656384996543, Dev Recall 0.5157961898177941, Dev Accuracy 0.7906137184115523


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 170, loss: 7.534191380685835
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46765788211385795,  Dev Precision 0.35150487294941507, Dev Recall 0.5157961898177941, Dev Accuracy 0.7907942238267147


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 171, loss: 7.532635083839075
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46765788211385795,  Dev Precision 0.35150487294941507, Dev Recall 0.5157961898177941, Dev Accuracy 0.7907942238267147


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 172, loss: 7.531079431078327
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46765788211385795,  Dev Precision 0.35150487294941507, Dev Recall 0.5157961898177941, Dev Accuracy 0.7907942238267147


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 173, loss: 7.5295486379025585
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4677913116125957,  Dev Precision 0.3517156271075376, Dev Recall 0.5157961898177941, Dev Accuracy 0.7909747292418772


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 174, loss: 7.528088356131938
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46796256140246023,  Dev Precision 0.3518732004458999, Dev Recall 0.5157961898177941, Dev Accuracy 0.7909747292418772


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 175, loss: 7.526650902050645
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.468817824560355,  Dev Precision 0.35240136945998446, Dev Recall 0.5178795231511275, Dev Accuracy 0.7913357400722021


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 176, loss: 7.525293606430737
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4688226699337223,  Dev Precision 0.3522810331253778, Dev Recall 0.5178795231511275, Dev Accuracy 0.7911552346570396


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 177, loss: 7.5239443138464175
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4686458602397229,  Dev Precision 0.3522644005694656, Dev Recall 0.517403332674937, Dev Accuracy 0.7911552346570396


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 178, loss: 7.522649163630471
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4686118666021333,  Dev Precision 0.3519005529027393, Dev Recall 0.517403332674937, Dev Accuracy 0.7911552346570396


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 179, loss: 7.521403558218657
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46924073395878435,  Dev Precision 0.35163636105901064, Dev Recall 0.5196131541035085, Dev Accuracy 0.7913357400722021


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 180, loss: 7.520224158443622
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46822719011036995,  Dev Precision 0.35076851362443573, Dev Recall 0.518779820770175, Dev Accuracy 0.7904332129963898


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 181, loss: 7.519048274453007
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4686989766337654,  Dev Precision 0.35118721221373, Dev Recall 0.5198214874368416, Dev Accuracy 0.7906137184115523


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 182, loss: 7.517944933763191
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46873772142348613,  Dev Precision 0.3513172913751376, Dev Recall 0.5198214874368416, Dev Accuracy 0.7906137184115523


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 183, loss: 7.516900069678008
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4682720079364067,  Dev Precision 0.35101354698606746, Dev Recall 0.5193452969606512, Dev Accuracy 0.7900722021660649


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 184, loss: 7.515919749416522
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4671572252505391,  Dev Precision 0.3498109325928092, Dev Recall 0.5193452969606512, Dev Accuracy 0.7893501805054151


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 185, loss: 7.514965363402865
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46715959996144785,  Dev Precision 0.35004593784951976, Dev Recall 0.5193452969606512, Dev Accuracy 0.7893501805054151


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 186, loss: 7.514098274174021
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46769069192236745,  Dev Precision 0.35060088237239234, Dev Recall 0.5193452969606512, Dev Accuracy 0.78971119133574


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 187, loss: 7.513280256470638
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4686711238455021,  Dev Precision 0.35105941461310153, Dev Recall 0.5206966483120026, Dev Accuracy 0.7900722021660649


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 188, loss: 7.5124696226262335
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4686711238455021,  Dev Precision 0.35105941461310153, Dev Recall 0.5206966483120026, Dev Accuracy 0.7900722021660649


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 189, loss: 7.511697452459762
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46892062616799707,  Dev Precision 0.351224954201718, Dev Recall 0.5206966483120026, Dev Accuracy 0.7902527075812273


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 190, loss: 7.51094919176244
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46945314421496914,  Dev Precision 0.351864276675988, Dev Recall 0.5203915887881931, Dev Accuracy 0.7909747292418772


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 191, loss: 7.510222260631732
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46900539497295873,  Dev Precision 0.35148912310005337, Dev Recall 0.5199061518949891, Dev Accuracy 0.7906137184115523


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 192, loss: 7.50951142097587
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46911012894866044,  Dev Precision 0.3516291007036368, Dev Recall 0.5199061518949891, Dev Accuracy 0.7907942238267147


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 193, loss: 7.508794941119294
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.468703504519683,  Dev Precision 0.3513553143970918, Dev Recall 0.5194299614187987, Dev Accuracy 0.7904332129963898


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 194, loss: 7.508073322808564
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4692003182947209,  Dev Precision 0.3515559311480344, Dev Recall 0.5202112114187987, Dev Accuracy 0.7906137184115523


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 195, loss: 7.507357355374009
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4684177843985968,  Dev Precision 0.3509145149929055, Dev Recall 0.5197395133055911, Dev Accuracy 0.7902527075812273


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 196, loss: 7.506658781820269
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.469022716490613,  Dev Precision 0.3514009353171118, Dev Recall 0.5202249501987951, Dev Accuracy 0.7906137184115523


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 197, loss: 7.505998031416936
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4686814933739483,  Dev Precision 0.35092388095979143, Dev Recall 0.5202249501987951, Dev Accuracy 0.7902527075812273


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 198, loss: 7.50537089447477
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.4691649295542019,  Dev Precision 0.35159052999234297, Dev Recall 0.5202249501987951, Dev Accuracy 0.7909747292418772


  0%|          | 0/67 [00:00<?, ?it/s]

epoch 199, loss: 7.504803629063848
Evaluating dev...


  0%|          | 0/5 [00:00<?, ?it/s]

Dev F1 0.46910340774597376,  Dev Precision 0.35188616711409243, Dev Recall 0.5197532520855874, Dev Accuracy 0.7911552346570396


In [58]:
torch.save(model, 'BaseClassifier.pt')

In [58]:
model = torch.load('BaseClassifier.pt')

In [59]:
print("Evaluating dev...")
all_preds = []
all_labels = []

test_input_batches = [b for b in chunk_multi(test_premises, test_conclusion, test_stance, batch_size)]
# Tokenize + encode
test_input_batches = [tokenizer(*batch) for batch in test_input_batches]


test_label_batches = [b for b in chunk(test_labels, batch_size)]
test_label_batches = [encode_labels(batch) for batch in test_label_batches]
for sents, labels in tqdm(zip(test_input_batches, test_label_batches), total=len(test_input_batches)):
    pred = predict(model, sents)
    all_preds.extend(pred)
    all_labels.extend(list(labels.numpy()))
# #print(range(len(set(train_labels))))

test_f1, test_P, test_R, test_acc, test_all_f1, test_all_P, test_all_R, test_all_acc = f1Score_multiLabel(all_preds, all_labels)
print(f"test F1 {test_f1},  test Precision {test_P}, test Recall {test_R}, test Accuracy {test_acc}")

with open("all_f1_test_base.csv", 'ab') as abc:
    np.savetxt(abc, 
           test_all_f1,
           delimiter =", ", 
           fmt ='%s')

Evaluating dev...


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for sents, labels in tqdm(zip(test_input_batches, test_label_batches), total=len(test_input_batches)):


  0%|          | 0/12 [00:00<?, ?it/s]

test F1 0.3561952484074856,  test Precision 0.2850124708417591, test Recall 0.4797666885959394, test Accuracy 0.7612217795484727
