In [1]:
from utility import SharedVars, HW2Params34, HW2Dataset12, HW2Dataset34, HW2Params12
from hw2.utils import read_dataset
import hw2.utils as their_utils
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pytorch_lightning as pl
from typing import Optional, Union, List, Dict, Any
from typing import Tuple, Dict
import transformers_embedder as tre
import os
from collections import OrderedDict

os.environ["TOKENIZERS_PARALLELISM"] = "false"

Loaded vars


In [2]:
#logger imports
from pytorch_lightning.loggers import WandbLogger

wandb_logger = WandbLogger(project="nlp-hw2")

[34m[1mwandb[0m: Currently logged in as: [33malessiorl[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [3]:
!nvidia-smi

Thu Jul  7 06:26:34 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 515.48.07    Driver Version: 515.48.07    CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  Off  | 00000000:09:00.0  On |                  N/A |
|  0%   47C    P8    14W / 151W |    444MiB /  8192MiB |     46%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+---------------------------------------------------------------------------

In [4]:
mainpath = os.path.join("..", "..")
datapath = os.path.join(mainpath, "data")
en_path = os.path.join(datapath, "EN")
es_path = os.path.join(datapath, "ES")
fr_path = os.path.join(datapath, "FR")
dev_name = "dev.json"
train_name = "train.json"

In [5]:
os.listdir(datapath)

['ES', 'baselines.json', 'EN', 'FR', '.placeholder']

In [6]:
from utilitymine import HW2Dataset34 as HW2Dataset


class HW2DataModule(pl.LightningDataModule):
    def __init__(self, data_train_path: str, data_dev_path: str, batch_size: int, data_test_path: str = None) -> None:
        super().__init__()
        self.data_train_path = data_train_path
        self.data_dev_path = data_dev_path
        self.data_test_path = data_test_path
        self.batch_size = batch_size

        self.train_dataset = None
        self.validation_dataset = None
        self.test_dataset = None

    def setup(self, stage: Optional[str] = None) -> None:
        if stage == 'fit':
            self.train_dataset = HW2Dataset(*read_dataset(self.data_train_path))
            self.validation_dataset = HW2Dataset(*read_dataset(self.data_dev_path))
        elif stage == 'test':
            self.test_dataset = HW2Dataset(*read_dataset(self.data_test_path))

    def train_dataloader(self, *args, **kwargs) -> DataLoader:
        return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True,
                          collate_fn=HW2Dataset.collate_fn,
                          num_workers=os.cpu_count())

    def val_dataloader(self, *args, **kwargs) -> Union[DataLoader, List[DataLoader]]:
        return DataLoader(self.validation_dataset, batch_size=self.batch_size, shuffle=False,
                          collate_fn=HW2Dataset.collate_fn, num_workers=os.cpu_count())

    def test_dataloader(self, *args, **kwargs) -> Union[DataLoader, List[DataLoader]]:
        return DataLoader(self.test_dataset, batch_size=self.batch_size, shuffle=False,
                          collate_fn=HW2Dataset.collate_fn,
                          num_workers=os.cpu_count())

In [7]:
def eval34(predictions, labels):
    argument_identification_results = their_utils.evaluate_argument_identification(labels, predictions)
    argument_classification_results = their_utils.evaluate_argument_classification(labels, predictions)
    f1_i, f1_c = argument_identification_results["f1"], argument_classification_results["f1"]
    return f1_i, f1_c

In [8]:
def eval12(predictions, labels):
    predicate_identification_results = their_utils.evaluate_predicate_identification(labels, predictions)
    predicate_disambiguation_results = their_utils.evaluate_predicate_disambiguation(labels, predictions)
    return predicate_identification_results["f1"], predicate_disambiguation_results["f1"]

In [9]:
class StudentModel12(pl.LightningModule):

    def __init__(self, language: str, params: HW2Params12, eval_type: str):
        super().__init__()
        # load the specific model for the input language
        self.language = language
        self.params = params

        # EMBEDDING LAYERS
        self.word_embedder = tre.TransformersEmbedder(params.language_model_name, subword_pooling_strategy="scatter",
                                                      layer_pooling_strategy="mean", fine_tune=self.params.fine_tune)

        combined_len = self.word_embedder.hidden_size

        self.lstm = nn.LSTM(input_size=combined_len, hidden_size=self.params.lstm_hidden_dim,
                            num_layers=self.params.lstm_layers, bidirectional=self.params.bidir,
                            dropout=self.params.lstm_dropout if self.params.lstm_layers > 1 else 0, batch_first=True)
        # Last layers

        linears = [("lin1",
                    torch.nn.Linear(self.params.lstm_hidden_dim * (2 if self.params.bidir else 1), self.params.hidden)),
                   ("droput", torch.nn.Dropout(self.params.dropout)), ("activation", torch.nn.ReLU()), ]

        linears1 = [("lin1", torch.nn.Linear(self.params.hidden, 3)), ]

        linears2 = [("lin1", torch.nn.Linear(self.params.hidden, self.params.n_classes12)), ]
        self.dual = nn.Sequential(OrderedDict(linears))
        self.classificator1 = nn.Sequential(OrderedDict(linears1))

        self.classificator2 = nn.Sequential(OrderedDict(linears2))

        self.optimizer = torch.optim.Adam(self.parameters(), lr=self.params.learning_rate,
                                          weight_decay=self.params.weight_decay)

        self.loss_fn_c = torch.nn.CrossEntropyLoss(ignore_index=SharedVars.pred2Index["<PAD>"])
        self.loss_fn_i = torch.nn.CrossEntropyLoss(ignore_index=2)

        wandb_logger.experiment.config.update(self.params.gethyperparameterdict())


    def forward(self, x: dict, y: Optional[dict] = None) -> Dict[str, torch.Tensor]:
        inputs = x["words"]
        out = self.word_embedder(**inputs)
        word_embedding = out.word_embeddings
        lstm_out, _ = self.lstm(word_embedding)
        out = self.dual(lstm_out)
        out1 = self.classificator1(out)
        out2 = self.classificator2(out)

        out1 = out1.permute(0, 2, 1)
        logits1 = torch.softmax(out1, dim=1)

        out2 = out2.permute(0, 2, 1)
        logits2 = torch.softmax(out2, dim=1)

        result = {'logits2': logits2, 'pred2': torch.argmax(logits2, dim=1), 'logits1': logits1,
                  'pred1': torch.argmax(logits1, dim=1)}

        # compute loss
        if y is not None:
            labels = y["labels"]
            labels1 = y["labels1"]
            # while mathematically the CrossEntropyLoss takes as input the probability distributions,
            # torch optimizes its computation internally and takes as input the logits instead
            loss = self.loss(out2, labels, out1, labels1)
            result['loss'] = loss

        return result

    def loss(self, pred, y, predi, yi):
        loss_1 = self.loss_fn_i(predi, yi)
        loss_2 = self.loss_fn_c(pred, y)
        return (loss_1 + (loss_2 * 9)) / 10

    def configure_optimizers(self):
        return self.optimizer

    def training_step(self, batch: Tuple[torch.Tensor], batch_idx: int) -> torch.Tensor:
        forward_output = self.forward(*batch)

        self.log('train_loss', forward_output['loss'], prog_bar=False, batch_size=self.params.batch_size)

        return forward_output['loss']

    def validation_step(self, batch: Tuple[torch.Tensor], batch_idx: int):
        sentences_len = batch[0]["words"]["sentence_lengths"]
        forward_output = self.forward(*batch)
        pred = dict()
        label = dict()
        for i, v in enumerate(batch[1]["sentence_ids"]):
            cls_index_shift = 1 if batch[0]["bool_cls"] else 0

            #also remove pad
            preds = HW2Dataset12.decode_labels(
                (forward_output["pred2"][i][cls_index_shift:sentences_len[i] - (2 - cls_index_shift)]).tolist())

            labels = HW2Dataset12.decode_labels(
                (batch[1]["labels"][i][cls_index_shift:sentences_len[i] - (2 - cls_index_shift)]).tolist())

            pred[v] = {"predicates": preds}
            label[v] = {"predicates": labels}
        try:
            f1_i, f1_c = eval12(pred, label)
        except ArithmeticError:
            f1_i = f1_c = 0.0

        self.log('val_f1_1', f1_i, prog_bar=False, batch_size=self.params.batch_size)
        self.log('val_f1_2', f1_c, prog_bar=True, batch_size=self.params.batch_size)
        self.log('val_loss', forward_output['loss'], prog_bar=True, batch_size=self.params.batch_size)

    def predict(self, sentence):
        """
        --> !!! STUDENT: implement here your predict function !!! <--

        Args:
                sentence: a dictionary that represents an input sentence, for example:
                        - If you are doing argument identification + argument classification:
                                {
                                        "words":
                                                [  "In",  "any",  "event",  ",",  "Mr.",  "Englund",  "and",  "many",  "others",  "say",  "that",  "the",  "easy",  "gains",  "in",  "narrowing",  "the",  "trade",  "gap",  "have",  "already",  "been",  "made",  "."  ]
                                        "lemmas":
                                                ["in", "any", "event", ",", "mr.", "englund", "and", "many", "others", "say", "that", "the", "easy", "gain", "in", "narrow", "the", "trade", "gap", "have", "already", "be", "make",  "."],
                                        "predicates":
                                                ["_", "_", "_", "_", "_", "_", "_", "_", "_", "AFFIRM", "_", "_", "_", "_", "_", "REDUCE_DIMINISH", "_", "_", "_", "_", "_", "_", "MOUNT_ASSEMBLE_PRODUCE", "_" ],
                                },
                        - If you are doing predicate disambiguation + argument identification + argument classification:
                                {
                                        "words": [...], # SAME AS BEFORE
                                        "lemmas": [...], # SAME AS BEFORE
                                        "predicates":
                                                [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0 ],
                                },
                        - If you are doing predicate identification + predicate disambiguation + argument identification + argument classification:
                                {
                                        "words": [...], # SAME AS BEFORE
                                        "lemmas": [...], # SAME AS BEFORE
                                        # NOTE: you do NOT have a "predicates" field here.
                                },

        Returns:
                A dictionary with your predictions:
                        - If you are doing argument identification + argument classification:
                                {
                                        "roles": list of lists, # A list of roles for each predicate in the sentence.
                                }
                        - If you are doing predicate disambiguation + argument identification + argument classification:
                                {
                                        "predicates": list, # A list with your predicted predicate senses, one for each token in the input sentence.
                                        "roles": dictionary of lists, # A list of roles for each pre-identified predicate (index) in the sentence.
                                }
                        - If you are doing predicate identification + predicate disambiguation + argument identification + argument classification:
                                {
                                        "predicates": list, # A list of predicate senses, one for each token in the sentence, null ("_") included.
                                        "roles": dictionary of lists, # A list of roles for each predicate (index) you identify in the sentence.
                                }
        """
        sentence_dataset = HW2Dataset12(sentence, hassentenceid=False)
        sentence_dataloader = DataLoader(sentence_dataset, batch_size=self.params.batch_size, shuffle=False,
                                         collate_fn=HW2Dataset12.collate_fn, num_workers=os.cpu_count())
        pred2 = dict()
        self.eval()
        with torch.no_grad():
            for batch in sentence_dataloader:
                sentences_len = batch[0]["words"]["sentence_lengths"]
                forward_output = self.forward(batch[0])
                for i, v in enumerate(batch[1]["sentence_ids"]):
                    cls_index_shift = 1 if batch[0]["bool_cls"] else 0
                    # also remove pad
                    preds2 = HW2Dataset12.decode_labels(
                        (forward_output["pred2"][i][cls_index_shift:sentences_len[i] - (2 - cls_index_shift)]).tolist())
                    pred2[v] = {"predicates": preds2}

        return pred2


class StudentModel34(pl.LightningModule):

    def __init__(self, language: str, params: HW2Params34, eval_type="34"):
        super().__init__()
        # load the specific model for the input language
        self.language = language
        self.params = params

        # EMBEDDING LAYERS
        self.word_embedder = tre.TransformersEmbedder(params.language_model_name, subword_pooling_strategy="scatter",
                                                      layer_pooling_strategy="mean", fine_tune=self.params.fine_tune)

        combined_len = (self.word_embedder.hidden_size * 2)

        self.lstm = nn.LSTM(input_size=combined_len, hidden_size=self.params.lstm_hidden_dim,
                            num_layers=self.params.lstm_layers, bidirectional=self.params.bidir,
                            dropout=self.params.lstm_dropout if self.params.lstm_layers > 1 else 0, batch_first=True)
        # Last layers

        linears = [("lin1",
                    torch.nn.Linear(self.params.lstm_hidden_dim * (2 if self.params.bidir else 1), self.params.hidden)),
                   ("activation", nn.ReLU()), ("lin2", torch.nn.Linear(self.params.hidden, self.params.n_classes34)), ]

        self.classificator = nn.Sequential(OrderedDict(linears))
        self.optimizer = torch.optim.Adam(self.parameters(), lr=self.params.learning_rate,
                                          weight_decay=self.params.weight_decay)
        self.loss_fn = torch.nn.CrossEntropyLoss(ignore_index=SharedVars.semanticRoles2Index["<PAD>"])

        wandb_logger.experiment.config.update(self.params.gethyperparameterdict())


    def forward(self, x: dict, y: Optional[dict] = None) -> Dict[str, torch.Tensor]:
        inputs = x["words"]
        preds = x["p_indexs"]

        out = self.word_embedder(**inputs)

        word_embedding = out.word_embeddings

        pred_embedding = torch.unsqueeze(word_embedding[range(len(preds)), preds, :], dim=1)
        pred_embedding = pred_embedding.expand(-1, word_embedding.shape[1], -1)
        combined_embeddings = torch.cat((word_embedding, pred_embedding), 2)

        lstm_out, _ = self.lstm(combined_embeddings)

        out = self.classificator(lstm_out)

        out = out.permute(0, 2, 1)
        logits = torch.softmax(out, dim=1)

        result = {'logits': logits, 'pred': torch.argmax(logits, dim=1)}
        # compute loss
        if y is not None:
            labels = y["labels"]
            # while mathematically the CrossEntropyLoss takes as input the probability distributions,
            # torch optimizes its computation internally and takes as input the logits instead
            loss = self.loss(out, labels)
            result['loss'] = loss

        return result

    def loss(self, pred, y):
        return self.loss_fn(pred, y)

    def training_step(self, batch: Tuple[torch.Tensor], batch_idx: int) -> torch.Tensor:
        forward_output = self.forward(*batch)

        #self.log('train_loss_3', forward_output['loss'], prog_bar=True, batch_size=self.params.batch_size)
        self.log('train_loss_4', forward_output['loss'], prog_bar=False, batch_size=self.params.batch_size)

        return forward_output['loss']

    def validation_step(self, batch: Tuple[torch.Tensor], batch_idx: int):
        sentences_len = batch[0]["words"]["sentence_lengths"]
        forward_output = self.forward(*batch)
        pred = dict()
        roles = dict()
        for i, v in enumerate(batch[1]["sentence_ids"]):
            pred_index = batch[0]["p_indexs"][i]
            cls_index_shift = 1 if batch[0]["bool_cls"] else 0
            #also remove pad
            proles = HW2Dataset34.decode_labels(
                (forward_output["pred"][i][cls_index_shift:sentences_len[i] - (2 - cls_index_shift)]).tolist())

            lroles = HW2Dataset34.decode_labels(
                (batch[1]["labels"][i][cls_index_shift:sentences_len[i] - (2 - cls_index_shift)]).tolist())

            pred[v] = {"roles": {pred_index: proles}}
            roles[v] = {"roles": {pred_index: lroles}}
        try:
            f1_i, f1_c = eval34(pred, roles)
        except ArithmeticError:
            f1_i = f1_c = 0.0

        self.log('val_f1_3', f1_i, prog_bar=False, batch_size=self.params.batch_size)
        self.log('val_f1_4', f1_c, prog_bar=True, batch_size=self.params.batch_size)
        #self.log('val_loss_3', forward_output['loss'], prog_bar=True, batch_size=self.params.batch_size)
        self.log('val_loss_4', forward_output['loss'], prog_bar=True, batch_size=self.params.batch_size)

    def configure_optimizers(self):
        return self.optimizer

    def predict(self, sentence):
        """
        --> !!! STUDENT: implement here your predict function !!! <--

        Args:
                sentence: a dictionary that represents an input sentence, for example:
                        - If you are doing argument identification + argument classification:
                                {
                                        "words":
                                                [  "In",  "any",  "event",  ",",  "Mr.",  "Englund",  "and",  "many",  "others",  "say",  "that",  "the",  "easy",  "gains",  "in",  "narrowing",  "the",  "trade",  "gap",  "have",  "already",  "been",  "made",  "."  ]
                                        "lemmas":
                                                ["in", "any", "event", ",", "mr.", "englund", "and", "many", "others", "say", "that", "the", "easy", "gain", "in", "narrow", "the", "trade", "gap", "have", "already", "be", "make",  "."],
                                        "predicates":
                                                ["_", "_", "_", "_", "_", "_", "_", "_", "_", "AFFIRM", "_", "_", "_", "_", "_", "REDUCE_DIMINISH", "_", "_", "_", "_", "_", "_", "MOUNT_ASSEMBLE_PRODUCE", "_" ],
                                },
                        - If you are doing predicate disambiguation + argument identification + argument classification:
                                {
                                        "words": [...], # SAME AS BEFORE
                                        "lemmas": [...], # SAME AS BEFORE
                                        "predicates":
                                                [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0 ],
                                },
                        - If you are doing predicate identification + predicate disambiguation + argument identification + argument classification:
                                {
                                        "words": [...], # SAME AS BEFORE
                                        "lemmas": [...], # SAME AS BEFORE
                                        # NOTE: you do NOT have a "predicates" field here.
                                },

        Returns:
                A dictionary with your predictions:
                        - If you are doing argument identification + argument classification:
                                {
                                        "roles": list of lists, # A list of roles for each predicate in the sentence.
                                }
                        - If you are doing predicate disambiguation + argument identification + argument classification:
                                {
                                        "predicates": list, # A list with your predicted predicate senses, one for each token in the input sentence.
                                        "roles": dictionary of lists, # A list of roles for each pre-identified predicate (index) in the sentence.
                                }
                        - If you are doing predicate identification + predicate disambiguation + argument identification + argument classification:
                                {
                                        "predicates": list, # A list of predicate senses, one for each token in the sentence, null ("_") included.
                                        "roles": dictionary of lists, # A list of roles for each predicate (index) you identify in the sentence.
                                }
        """

        sentence_dataset = HW2Dataset34(sentence, hassentenceid=False)
        sentence_dataloader = DataLoader(sentence_dataset, batch_size=self.params.batch_size, shuffle=False,
                                         collate_fn=HW2Dataset34.collate_fn, num_workers=os.cpu_count())
        pred = dict()
        self.eval()

        with torch.no_grad():
            for batch in sentence_dataloader:
                sentences_len = batch[0]["words"]["sentence_lengths"]
                forward_output = self.forward(batch[0])

                for i, v in enumerate(batch[1]["sentence_ids"]):
                    pred_index = batch[0]["p_indexs"][i].item()
                    cls_index_shift = 1 if batch[0]["bool_cls"] else 0
                    proles = HW2Dataset34.decode_labels(
                        (forward_output["pred"][i][cls_index_shift:sentences_len[i] - (2 - cls_index_shift)]).tolist())
                    pred[pred_index] = proles

        return {"roles": pred}

class StudentModel1234(pl.LightningModule):

    def __init__(self, language: str, model12: StudentModel12, model34: StudentModel34):
        super().__init__()
        self.model12 = model12
        self.model34 = model34

    def predict(self, sentence):
        out12 = self.model12.predict(sentence)

        sentence["predicates"] = out12.popitem()[1]["predicates"]
        out34 = self.model34.predict(sentence)

        result = {"predicates": sentence["predicates"], "roles": out34["roles"]}

        return result

In [10]:
early_stopping = pl.callbacks.EarlyStopping(monitor='val_f1_4',
                                            # the value that will be evaluated to activate the early stopping of the model.
                                            patience=3,
                                            # the number of consecutive attempts that the model has to raise (or lower depending on the metric used) to raise the "monitor" value.
                                            verbose=True,  # whether to log or not information in the console.
                                            mode='max',
                                            # wheter we want to maximize (max) or minimize the "monitor" value.
                                            )

check_point_callback = pl.callbacks.ModelCheckpoint(monitor='val_f1_4',
                                                    # the value that we want to use for model selection.
                                                    verbose=True,  # whether to log or not information in the console.
                                                    save_top_k=3,  # the number of checkpoints we want to store.
                                                    mode='max',
                                                    # wheter we want to maximize (max) or minimize the "monitor" value.
                                                    dirpath=os.path.join(mainpath, 'experiments/hw2models'),
                                                    # output directory path
                                                    filename=wandb_logger.experiment.name + '-{epoch}-{val_f1_4:.4f}'
                                                    # the prefix on the checkpoint values. Metrics store by the trainer can be used to dynamically change the name.
                                                    )

hw2_dm = HW2DataModule(data_train_path=os.path.join(en_path, train_name), data_dev_path=os.path.join(en_path, dev_name),
                       data_test_path=os.path.join(en_path, dev_name), batch_size=HW2Params34.batch_size)

In [11]:
hw2_dm.setup('fit')
train_dataloader = hw2_dm.train_dataloader()

In [12]:
model = StudentModel34(language="EN", params=HW2Params34("gpu"), eval_type="34")

Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaModel: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing XLMRobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [13]:
# the PyTorch Lightning Trainer
trainer = pl.Trainer(max_epochs=HW2Params34.epochs,  # maximum number of epochs.
                     gpus=HW2Params34.gpus,  # the number of gpus we have at our disposal.
                     callbacks=[early_stopping, check_point_callback],  # the callback we want our trainer to use.
                     logger=wandb_logger,
                     )

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [14]:
trainer.fit(model=model, datamodule=hw2_dm)

  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type                 | Params
-------------------------------------------------------
0 | word_embedder | TransformersEmbedder | 278 M 
1 | lstm          | LSTM                 | 1.7 M 
2 | classificator | Sequential           | 36.6 K
3 | loss_fn       | CrossEntropyLoss     | 0     
-------------------------------------------------------
279 M     Trainable params
0         Non-trainable params
279 M     Total params
1,119.145 Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_f1_4 improved. New best score: 0.000
Epoch 0, global step 786: 'val_f1_4' reached 0.00000 (best 0.00000), saving model to '/home/orlando/PycharmProjects/nlp2022-homeworks/nlp2022-hw2/experiments/hw2models/frosty-morning-68-epoch=0-val_f1_4=0.0000.ckpt' as top 3


Validation: 0it [00:00, ?it/s]

Metric val_f1_4 improved by 0.258 >= min_delta = 0.0. New best score: 0.258
Epoch 1, global step 1572: 'val_f1_4' reached 0.25809 (best 0.25809), saving model to '/home/orlando/PycharmProjects/nlp2022-homeworks/nlp2022-hw2/experiments/hw2models/frosty-morning-68-epoch=1-val_f1_4=0.2581.ckpt' as top 3


Validation: 0it [00:00, ?it/s]

Metric val_f1_4 improved by 0.160 >= min_delta = 0.0. New best score: 0.418
Epoch 2, global step 2358: 'val_f1_4' reached 0.41843 (best 0.41843), saving model to '/home/orlando/PycharmProjects/nlp2022-homeworks/nlp2022-hw2/experiments/hw2models/frosty-morning-68-epoch=2-val_f1_4=0.4184.ckpt' as top 3


Validation: 0it [00:00, ?it/s]

Metric val_f1_4 improved by 0.079 >= min_delta = 0.0. New best score: 0.498
Epoch 3, global step 3144: 'val_f1_4' reached 0.49779 (best 0.49779), saving model to '/home/orlando/PycharmProjects/nlp2022-homeworks/nlp2022-hw2/experiments/hw2models/frosty-morning-68-epoch=3-val_f1_4=0.4978.ckpt' as top 3


Validation: 0it [00:00, ?it/s]

Metric val_f1_4 improved by 0.074 >= min_delta = 0.0. New best score: 0.572
Epoch 4, global step 3930: 'val_f1_4' reached 0.57198 (best 0.57198), saving model to '/home/orlando/PycharmProjects/nlp2022-homeworks/nlp2022-hw2/experiments/hw2models/frosty-morning-68-epoch=4-val_f1_4=0.5720.ckpt' as top 3


Validation: 0it [00:00, ?it/s]

Metric val_f1_4 improved by 0.091 >= min_delta = 0.0. New best score: 0.663
Epoch 5, global step 4716: 'val_f1_4' reached 0.66304 (best 0.66304), saving model to '/home/orlando/PycharmProjects/nlp2022-homeworks/nlp2022-hw2/experiments/hw2models/frosty-morning-68-epoch=5-val_f1_4=0.6630.ckpt' as top 3


Validation: 0it [00:00, ?it/s]

Metric val_f1_4 improved by 0.045 >= min_delta = 0.0. New best score: 0.708
Epoch 6, global step 5502: 'val_f1_4' reached 0.70772 (best 0.70772), saving model to '/home/orlando/PycharmProjects/nlp2022-homeworks/nlp2022-hw2/experiments/hw2models/frosty-morning-68-epoch=6-val_f1_4=0.7077.ckpt' as top 3


Validation: 0it [00:00, ?it/s]

Metric val_f1_4 improved by 0.034 >= min_delta = 0.0. New best score: 0.742
Epoch 7, global step 6288: 'val_f1_4' reached 0.74202 (best 0.74202), saving model to '/home/orlando/PycharmProjects/nlp2022-homeworks/nlp2022-hw2/experiments/hw2models/frosty-morning-68-epoch=7-val_f1_4=0.7420.ckpt' as top 3


Validation: 0it [00:00, ?it/s]

Metric val_f1_4 improved by 0.022 >= min_delta = 0.0. New best score: 0.764
Epoch 8, global step 7074: 'val_f1_4' reached 0.76382 (best 0.76382), saving model to '/home/orlando/PycharmProjects/nlp2022-homeworks/nlp2022-hw2/experiments/hw2models/frosty-morning-68-epoch=8-val_f1_4=0.7638.ckpt' as top 3


Validation: 0it [00:00, ?it/s]

Metric val_f1_4 improved by 0.012 >= min_delta = 0.0. New best score: 0.776
Epoch 9, global step 7860: 'val_f1_4' reached 0.77586 (best 0.77586), saving model to '/home/orlando/PycharmProjects/nlp2022-homeworks/nlp2022-hw2/experiments/hw2models/frosty-morning-68-epoch=9-val_f1_4=0.7759.ckpt' as top 3


Validation: 0it [00:00, ?it/s]

Metric val_f1_4 improved by 0.011 >= min_delta = 0.0. New best score: 0.787
Epoch 10, global step 8646: 'val_f1_4' reached 0.78694 (best 0.78694), saving model to '/home/orlando/PycharmProjects/nlp2022-homeworks/nlp2022-hw2/experiments/hw2models/frosty-morning-68-epoch=10-val_f1_4=0.7869.ckpt' as top 3


Validation: 0it [00:00, ?it/s]

Metric val_f1_4 improved by 0.004 >= min_delta = 0.0. New best score: 0.791
Epoch 11, global step 9432: 'val_f1_4' reached 0.79131 (best 0.79131), saving model to '/home/orlando/PycharmProjects/nlp2022-homeworks/nlp2022-hw2/experiments/hw2models/frosty-morning-68-epoch=11-val_f1_4=0.7913.ckpt' as top 3


Validation: 0it [00:00, ?it/s]

Metric val_f1_4 improved by 0.014 >= min_delta = 0.0. New best score: 0.805
Epoch 12, global step 10218: 'val_f1_4' reached 0.80510 (best 0.80510), saving model to '/home/orlando/PycharmProjects/nlp2022-homeworks/nlp2022-hw2/experiments/hw2models/frosty-morning-68-epoch=12-val_f1_4=0.8051.ckpt' as top 3


Validation: 0it [00:00, ?it/s]

Epoch 13, global step 11004: 'val_f1_4' reached 0.80147 (best 0.80510), saving model to '/home/orlando/PycharmProjects/nlp2022-homeworks/nlp2022-hw2/experiments/hw2models/frosty-morning-68-epoch=13-val_f1_4=0.8015.ckpt' as top 3


Validation: 0it [00:00, ?it/s]

Metric val_f1_4 improved by 0.005 >= min_delta = 0.0. New best score: 0.810
Epoch 14, global step 11790: 'val_f1_4' reached 0.81013 (best 0.81013), saving model to '/home/orlando/PycharmProjects/nlp2022-homeworks/nlp2022-hw2/experiments/hw2models/frosty-morning-68-epoch=14-val_f1_4=0.8101.ckpt' as top 3


Validation: 0it [00:00, ?it/s]

Metric val_f1_4 improved by 0.001 >= min_delta = 0.0. New best score: 0.811
Epoch 15, global step 12576: 'val_f1_4' reached 0.81071 (best 0.81071), saving model to '/home/orlando/PycharmProjects/nlp2022-homeworks/nlp2022-hw2/experiments/hw2models/frosty-morning-68-epoch=15-val_f1_4=0.8107.ckpt' as top 3


Validation: 0it [00:00, ?it/s]

Metric val_f1_4 improved by 0.001 >= min_delta = 0.0. New best score: 0.812
Epoch 16, global step 13362: 'val_f1_4' reached 0.81160 (best 0.81160), saving model to '/home/orlando/PycharmProjects/nlp2022-homeworks/nlp2022-hw2/experiments/hw2models/frosty-morning-68-epoch=16-val_f1_4=0.8116.ckpt' as top 3


Validation: 0it [00:00, ?it/s]

Metric val_f1_4 improved by 0.003 >= min_delta = 0.0. New best score: 0.815
Epoch 17, global step 14148: 'val_f1_4' reached 0.81465 (best 0.81465), saving model to '/home/orlando/PycharmProjects/nlp2022-homeworks/nlp2022-hw2/experiments/hw2models/frosty-morning-68-epoch=17-val_f1_4=0.8147.ckpt' as top 3


Validation: 0it [00:00, ?it/s]

Metric val_f1_4 improved by 0.004 >= min_delta = 0.0. New best score: 0.819
Epoch 18, global step 14934: 'val_f1_4' reached 0.81881 (best 0.81881), saving model to '/home/orlando/PycharmProjects/nlp2022-homeworks/nlp2022-hw2/experiments/hw2models/frosty-morning-68-epoch=18-val_f1_4=0.8188.ckpt' as top 3


Validation: 0it [00:00, ?it/s]

Metric val_f1_4 improved by 0.002 >= min_delta = 0.0. New best score: 0.821
Epoch 19, global step 15720: 'val_f1_4' reached 0.82111 (best 0.82111), saving model to '/home/orlando/PycharmProjects/nlp2022-homeworks/nlp2022-hw2/experiments/hw2models/frosty-morning-68-epoch=19-val_f1_4=0.8211.ckpt' as top 3


Validation: 0it [00:00, ?it/s]

Epoch 20, global step 16506: 'val_f1_4' reached 0.82001 (best 0.82111), saving model to '/home/orlando/PycharmProjects/nlp2022-homeworks/nlp2022-hw2/experiments/hw2models/frosty-morning-68-epoch=20-val_f1_4=0.8200.ckpt' as top 3


Validation: 0it [00:00, ?it/s]

Metric val_f1_4 improved by 0.000 >= min_delta = 0.0. New best score: 0.821
Epoch 21, global step 17292: 'val_f1_4' reached 0.82118 (best 0.82118), saving model to '/home/orlando/PycharmProjects/nlp2022-homeworks/nlp2022-hw2/experiments/hw2models/frosty-morning-68-epoch=21-val_f1_4=0.8212.ckpt' as top 3


Validation: 0it [00:00, ?it/s]

Epoch 22, global step 18078: 'val_f1_4' was not in top 3


Validation: 0it [00:00, ?it/s]

Epoch 23, global step 18864: 'val_f1_4' reached 0.82049 (best 0.82118), saving model to '/home/orlando/PycharmProjects/nlp2022-homeworks/nlp2022-hw2/experiments/hw2models/frosty-morning-68-epoch=23-val_f1_4=0.8205.ckpt' as top 3


Validation: 0it [00:00, ?it/s]

Monitored metric val_f1_4 did not improve in the last 3 records. Best score: 0.821. Signaling Trainer to stop.
Epoch 24, global step 19650: 'val_f1_4' reached 0.82110 (best 0.82118), saving model to '/home/orlando/PycharmProjects/nlp2022-homeworks/nlp2022-hw2/experiments/hw2models/frosty-morning-68-epoch=24-val_f1_4=0.8211.ckpt' as top 3


In [15]:
param = HW2Params34("gpu")
wandb_logger.experiment.config.update(param.gethyperparameterdict())


In [17]:
load = True
if load:
    savemodel = model.load_from_checkpoint(
        '/home/orlando/PycharmProjects/nlp2022-homeworks/nlp2022-hw2/experiments/hw2models/frosty-morning-68-epoch=21-val_f1_4=0.8212.ckpt',
        language="EN", params=HW2Params34("gpu"), eval_type="34")

Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaModel: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing XLMRobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [18]:
torch.save(model.state_dict(), os.path.join(mainpath, "model", wandb_logger.experiment.name + "_highest_ckpt.bkp"))