In [1]:

import torch
import gc
torch.cuda.empty_cache()
gc.collect()


0

In [2]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
# Initialize the console

# Step 2: Import necessary libraries
from typing import Optional, Dict, Any
import os
import torch
import pytorch_lightning as pl
import pandas as pd
from transformers import AutoTokenizer
from torch.utils.data import Dataset, DataLoader
from rich.traceback import install
from pytorch_lightning.loggers import WandbLogger
import torch.nn as nn
import torch.optim as optim
from transformers import AutoConfig
from adapters import AutoAdapterModel, AdapterConfig
from adapters.composition import Stack
from pytorch_lightning import seed_everything, Trainer
from pytorch_lightning.callbacks import ModelCheckpoint,EarlyStopping
import torchmetrics

install(show_locals=True)

from setup import setup_src_path
print(setup_src_path())
import data.processed as processed
import config.config as config
import utils.setup as setup
import utils.functions as fn
from importlib import reload

from datasets import load_from_disk

print(config.Config.TXT_SAVE_PATH)
print(config.Config.MODEL_SAVE_PATH)

#dataset = load_from_disk(f"../{config.Config.DATASETS_SAVE_PATH}/datasets")


['/usr/lib/python38.zip', '/usr/lib/python3.8', '/usr/lib/python3.8/lib-dynload', '', '/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages', '/tmp/tmp6fe7evft', '/home/guest/Desktop/projects/third-experiments/SDA_experiments/modules']


2024-09-28 10:21:49.933274: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-09-28 10:21:49.964025: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.




./text-files/
./hp-model-


In [3]:
import torch
import os
import pytorch_lightning as pl
from transformers import  AutoConfig, DataCollatorForLanguageModeling
from collections import defaultdict
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
import numpy as np
import torchmetrics

class JointDomainTaskAdapter(pl.LightningModule):
    def __init__(self, hparams,source_dataset_length,target_dataset_length):
        super(JointDomainTaskAdapter, self).__init__()
        self.save_hyperparameters(hparams)

        # Load config with hidden states output
        self.config = AutoConfig.from_pretrained(self.hparams["pretrained_model_name"])
        self.config.output_hidden_states = True
        self.model = AutoAdapterModel.from_pretrained(self.hparams["pretrained_model_name"], config=self.config)

        # Set reduction factor and leave_out layers
        self.reduction_factor = self.hparams.get("reduction_factor", 16)
        self.leave_out = self.hparams.get("leave_out", [])
        # if self.leave_out != "None":
        #     self.leave_out = self.leave_out.split(",")
        #     self.leave_out = [int(i) for i in self.leave_out]
        # else:
        #     self.leave_out = []

        # Load MLM adapter with head
        self.model.load_adapter(f"{self.hparams['saved_adapter_dir']}/{self.hparams['domain_adapter_name']}", with_head=True)

        # Add classification head for the task
        self.model.add_classification_head(f"{self.hparams['task_adapter_name']}", num_labels=self.hparams["num_classes"])

        # Set active adapters
        self.model.train_adapter(self.hparams['domain_adapter_name'])
        # Calculate alpha based on dataset lengths
        self.alpha = source_dataset_length / (source_dataset_length + target_dataset_length)

        # Initialize loss functions and metrics
        self.criterion = nn.CrossEntropyLoss()
        self.mlm_criterion = nn.CrossEntropyLoss()
        self.accuracy = torchmetrics.Accuracy(task='multiclass', num_classes=self.hparams["num_classes"])
        self.f1 = torchmetrics.F1Score(task='multiclass', num_classes=self.hparams["num_classes"], average="weighted")
        self.f1_macro = torchmetrics.F1Score(task='multiclass', num_classes=self.hparams["num_classes"], average="macro")
        self.f1_micro = torchmetrics.F1Score(task='multiclass',num_classes=self.hparams["num_classes"], average="micro")

        self.softmax = nn.Softmax(dim=1)
        self.validation_outputs = []
        self.test_outputs = []
        # Optimizer related variables
        self.learning_rate = self.hparams.get("learning_rate", 1e-4)
        self.scheduler_factor = self.hparams.get("scheduler_factor", 0.1)
        self.scheduler_patience = self.hparams.get("scheduler_patience", 0.05)
        self.scheduler_threshold = self.hparams.get("scheduler_threshold", 0.0001)
        self.scheduler_cooldown = self.hparams.get("scheduler_cooldown", 0)
        self.scheduler_eps = self.hparams.get("scheduler_eps", 1e-8)

    def forward(self, input_ids, attention_mask=None, labels=None, task=None):
        if task == "mlm":
            self.model.active_head= self.hparams['domain_adapter_name']
            outputs = self.model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        elif task == "classification":
            self.model.active_head= self.hparams['task_adapter_name']
            outputs = self.model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        else:
            raise ValueError("Task must be either 'mlm' or 'classification'.")
        return outputs

    def training_step(self, batch, batch_idx):
        # Separate source and target data processing
        source_input_ids = batch["source_input_ids"]
        source_attention_mask = batch["source_attention_mask"]
        source_labels = batch["label_source"]

        target_input_ids = batch["target_input_ids"]
        target_attention_mask = batch["target_attention_mask"]
        mlm_labels = batch["mlm_labels"]

        # Calculate dynamic alpha based on the lengths of source and target data
        alpha = self.alpha

        # Classification task
        cls_outputs = self(input_ids=source_input_ids, attention_mask=source_attention_mask, task="classification")
        cls_logits = cls_outputs.logits
        task_loss = self.criterion(cls_logits, source_labels)

        # MLM task
        mlm_outputs = self(input_ids=target_input_ids, attention_mask=target_attention_mask, labels=mlm_labels, task="mlm")
        mlm_loss = mlm_outputs.loss

        # Combine losses
        loss = alpha * task_loss + (1 - alpha) * mlm_loss

        accuracy = self.accuracy(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))
        f1 = self.f1(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))

        metrics = {
            "train/accuracy": accuracy,
            "train/f1": f1,
            "train/taskclf_loss": task_loss,
            "train/loss": loss,
            "train/mlm_loss": mlm_loss
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)

        return loss

    def validation_step(self, batch, batch_idx):
        # Separate source and target data processing
        source_input_ids = batch["source_input_ids"]
        source_attention_mask = batch["source_attention_mask"]
        source_labels = batch["label_source"]

        target_input_ids = batch["target_input_ids"]
        target_attention_mask = batch["target_attention_mask"]
        mlm_labels = batch["mlm_labels"]

        # Calculate dynamic alpha based on the lengths of source and target data
        alpha = self.alpha
        # Classification task
        cls_outputs = self(input_ids=source_input_ids, attention_mask=source_attention_mask, task="classification")
        cls_logits = cls_outputs.logits
        task_loss = self.criterion(cls_logits, source_labels)

        # MLM task
        mlm_outputs = self(input_ids=target_input_ids, attention_mask=target_attention_mask, labels=mlm_labels, task="mlm")
        mlm_loss = mlm_outputs.loss

        # Combine losses
        loss = alpha * task_loss + (1 - alpha) * mlm_loss

        accuracy = self.accuracy(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))
        f1 = self.f1(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))
        self.validation_outputs.append({
            "val/accuracy": accuracy,
            "val/f1": f1,
            "val/taskclf_loss": task_loss,
            "val/loss": loss,
            "val/mlm_loss": mlm_loss
                })
        
        metrics = {
            "val/accuracy": accuracy,
            "val/f1": f1,
            "val/taskclf_loss": task_loss,
            "val/loss": loss,
            "val/mlm_loss": mlm_loss
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)

        return metrics
    def on_validation_epoch_start(self):
        self.validation_outputs = []
    
    def on_validation_epoch_end(self):
        outputs= self.validation_outputs

        avg_loss = torch.stack([x["val/loss"] for x in outputs]).mean()
        avg_task_loss = torch.stack([x["val/taskclf_loss"] for x in outputs]).mean()
        avg_mlm_loss = torch.stack([x["val/mlm_loss"] for x in outputs]).mean()
        avg_accuracy = torch.stack([x["val/accuracy"] for x in outputs]).mean()
        avg_f1 = torch.stack([x["val/f1"] for x in outputs]).mean()
        print(f"val/accuracy: {avg_accuracy}")
        print(f"val/f1: {avg_f1}")
        print(f"val/taskclf_loss: {avg_task_loss}")
        print(f"val/loss: {avg_loss}")
        print(f"val/mlm_loss: {avg_mlm_loss}")
        metrics = {
            "val/avg_loss": avg_loss,
            "val/avg_taskclf_loss": avg_task_loss,
            "val/avg_mlm_loss": avg_mlm_loss,
            "val/avg_accuracy": avg_accuracy,
            "val/avg_f1": avg_f1,
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)
        self.log("val_loss", avg_loss)

    def test_step(self, batch, batch_idx):
        # Separate source and target data processing
        source_input_ids = batch["source_input_ids"]
        source_attention_mask = batch["source_attention_mask"]
        source_labels = batch["label_source"]

        target_input_ids = batch["target_input_ids"]
        target_attention_mask = batch["target_attention_mask"]
        target_labels = batch["label_target"]

        # Classification task for source data
        cls_outputs_source = self(input_ids=source_input_ids, attention_mask=source_attention_mask, task="classification")
        cls_logits_source = cls_outputs_source.logits
        task_loss_source = self.criterion(cls_logits_source, source_labels)

        # Classification task for target data
        cls_outputs_target = self(input_ids=target_input_ids, attention_mask=target_attention_mask, task="classification")
        cls_logits_target = cls_outputs_target.logits
        task_loss_target = self.criterion(cls_logits_target, target_labels)

        # Combine losses (though typically you would evaluate them separately)
        loss = task_loss_source + task_loss_target

        accuracy_source = self.accuracy(source_labels, torch.argmax(self.softmax(cls_logits_source), dim=1))
        f1_source = self.f1(source_labels, torch.argmax(self.softmax(cls_logits_source), dim=1))
        f1_macro_source = self.f1_macro(source_labels, torch.argmax(self.softmax(cls_logits_source), dim=1))
        f1_micro_source = self.f1_micro(source_labels, torch.argmax(self.softmax(cls_logits_source), dim=1))
    
        accuracy_target = self.accuracy(target_labels, torch.argmax(self.softmax(cls_logits_target), dim=1))
        f1_target = self.f1(target_labels, torch.argmax(self.softmax(cls_logits_target), dim=1))
        f1_macro_target = self.f1_macro(target_labels, torch.argmax(self.softmax(cls_logits_target), dim=1))
        f1_micro_target = self.f1_micro(target_labels, torch.argmax(self.softmax(cls_logits_target), dim=1))

        metrics = {
            "source_test/loss": task_loss_source,
            "source_test/accuracy": accuracy_source,
            "source_test/f1": f1_source,
            "source_test/f1_macro": f1_macro_source,
            "source_test/f1_micro": f1_micro_source,
            "target_test/loss": task_loss_target,
            "target_test/accuracy": accuracy_target,
            "target_test/f1": f1_target,
            "target_test/f1_macro": f1_macro_target,
            "target_test/f1_micro": f1_micro_target,
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)

        self.test_outputs.append({
            "source_test/loss": task_loss_source,
            "source_test/accuracy": accuracy_source,
            "source_test/f1": f1_source,
            "source_test/f1_macro": f1_macro_source,
            "source_test/f1_micro": f1_micro_source,
            "target_test/loss": task_loss_target,
            "target_test/accuracy": accuracy_target,
            "target_test/f1": f1_target,
            "target_test/f1_macro": f1_macro_target,
            "target_test/f1_micro": f1_micro_target,
        })
        return metrics
    def on_test_epoch_start(self):
        self.test_outputs = []

    def on_test_epoch_end(self):
        outputs=  self.test_outputs

        avg_loss_source = torch.stack([x["source_test/loss"] for x in outputs]).mean()
        avg_task_loss_target = torch.stack([x["target_test/loss"] for x in outputs]).mean()
        avg_accuracy_source = torch.stack([x["source_test/accuracy"] for x in outputs]).mean()
        avg_f1_source = torch.stack([x["source_test/f1"] for x in outputs]).mean()
        avg_f1_macro_source = torch.stack([x["source_test/f1_macro"] for x in outputs]).mean()
        avg_f1_micro_source = torch.stack([x["source_test/f1_micro"] for x in outputs]).mean()

        avg_accuracy_target = torch.stack([x["target_test/accuracy"] for x in outputs]).mean()
        avg_f1_target = torch.stack([x["target_test/f1"] for x in outputs]).mean()
        avg_f1_macro_target = torch.stack([x["target_test/f1_macro"] for x in outputs]).mean()
        avg_f1_micro_target = torch.stack([x["target_test/f1_micro"] for x in outputs]).mean()

        metrics = {
            "source_test/loss": avg_loss_source,
            "target_test/loss": avg_task_loss_target,
            "source_test/accuracy": avg_accuracy_source,
            "source_test/f1": avg_f1_source,
            "source_test/f1_macro": avg_f1_macro_source,
            "source_test/f1_micro": avg_f1_micro_source,
            "target_test/accuracy": avg_accuracy_target,
            "target_test/f1": avg_f1_target,
            "target_test/f1_macro": avg_f1_macro_target,
            "target_test/f1_micro": avg_f1_micro_target,
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)
    def save_adapter(self, location, adapter_name):
        self.model.save_adapter(location, adapter_name)

    def configure_optimizers(self):
        optimizer = optim.AdamW(self.parameters(), lr=self.hparams["learning_rate"])
        lr_scheduler = {
            'scheduler': optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=2, threshold=0.0001, cooldown=0, min_lr=1e-8),
            'monitor': 'val_loss'
        }
        return [optimizer], [lr_scheduler]


In [4]:
import wandb

wandb.login()
# Wandb setup and training loop
seeds = [10, 100, 1000]  # List of seeds
project_name = 'SDA_mixed_edited'  # Replace with your wandb project name
domain = 'AMR'  # Replace with the specific domain for this notebook
type = 'union'  # Replace with the specific type for this notebook

# Initialize results dictionary
results = {
    "last_epoch": {
        "source_test/loss": [],
        "source_test/accuracy": [],
        "source_test/f1": [],
        "source_test/f1_macro": [],
        "source_test/f1_micro": [],
        "target_test/loss": [],
        "target_test/accuracy": [],
        "target_test/f1": [],
        "target_test/f1_macro": [],
        "target_test/f1_micro": [],
    },
    "best_model": {
        "source_test/loss": [],
        "source_test/accuracy": [],
        "source_test/f1": [],
        "source_test/f1_macro": [],
        "source_test/f1_micro": [],
        "target_test/loss": [],
        "target_test/accuracy": [],
        "target_test/f1": [],
        "target_test/f1_macro": [],
        "target_test/f1_micro": [],
    },
    "epoch_saved": {
        "source_test/loss": [],
        "source_test/accuracy": [],
        "source_test/f1": [],
        "source_test/f1_macro": [],
        "source_test/f1_micro": [],
        "target_test/loss": [],
        "target_test/accuracy": [],
        "target_test/f1": [],
        "target_test/f1_macro": [],
        "target_test/f1_micro": [],
    }
}

best_val_loss = float('inf')
best_model = None
best_model_path = ""

[34m[1mwandb[0m: Currently logged in as: [33mmrawhani5[0m ([33mmrawhani[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [5]:
reload(processed)
for seed in seeds:
    #wandb.init(project=project_name, name=f'{domain}_{type}_run_with_seed_{seed}', config={'seed': seed})

    try:
        seed_everything(seed)

        hparams = {
            "source_target": "apparel_MR",
            "dataset_cache_dir": "./../../datasets",
            "pretrained_model_name": "bert-base-uncased",
            "padding": True,
            "source_domain": "apparel",
            "target_domain": "MR",
            "domain_adapter_name": "mlm_union_mr",
            "task_adapter_name": "AMRUni",
            "padding": "max_length",
            "max_seq_length": 512,
            "bsz": 32,
            "num_classes": 3,
            "learning_rate": 1e-4,
            "reduction_factor": 16,
            "mode": "domain",
            "saved_adapter_dir": "../saved/adapters",
        }

        save_dir = "checkpoints"
        save_epoch_3 = 6  # Save model at the 3rd epoch
        
        dm = processed.DataModuleSourceTargetMixed(hparams)
        dm.setup('fit')
        dm.setup("test")
        source_length, target_length = dm.get_dataset_lengths()
        print(f"Source dataset length: {source_length}")
        print(f"Target dataset length: {target_length}")
        model = JointDomainTaskAdapter(hparams,source_length,target_length)

        checkpoint_callback = ModelCheckpoint(
            filename="task-AMRUni-{epoch:02d}-{val_loss:.2f}",
            save_top_k=1,
            monitor="val_loss",
            mode="min",
        )
        save_model_callback_epoch = ModelCheckpoint(
            filename="AMRUni-{epoch:02d}",
            every_n_epochs=save_epoch_3,
            save_top_k=-1,
        )

        #wandb_logger = WandbLogger()

    except Exception as e:
        print(f"Error during preprocessing : {e}")

    try:
        train_loader = dm.train_dataloader()
        val_loader = dm.val_dataloader()
        trainer = Trainer(
            max_epochs=10,
            accelerator="auto",
            #precision=16,
            
            default_root_dir="checkpoints",
            #logger=wandb_logger,
            callbacks=[checkpoint_callback, save_model_callback_epoch],
            limit_train_batches=1.0,
            limit_val_batches=1.0,
            limit_test_batches=1.0,
        )

        trainer.fit(model, train_loader, val_loader)
        print(f"Best checkpoint path: {checkpoint_callback.best_model_path}")
        print(f"Saved epoch checkpoint path: {save_model_callback_epoch.best_model_path}")
    except Exception as e:
        print(f"Error during training : {e}")

    try:
        dm.setup("test")
        test_loader = dm.test_dataloader()
        test_results_last = trainer.test(model, test_loader)
        print("Test Results Last Epoch:", test_results_last)

        # Collect results for last epoch model
        for key, value in test_results_last[0].items():
            results["last_epoch"][key].append(value)

        # Paths to the saved checkpoints
        best_checkpoint_path = checkpoint_callback.best_model_path
        saved_epoch_checkpoint_path = save_model_callback_epoch.best_model_path
        print(f"Best checkpoint path: {best_checkpoint_path}")
        print(f"Saved epoch checkpoint path: {saved_epoch_checkpoint_path}")

        best_model = JointDomainTaskAdapter.load_from_checkpoint(best_checkpoint_path,source_dataset_length=source_length, target_dataset_length=target_length)
        test_results_best = trainer.test(best_model, test_loader)
        print("Test Results on Best Model:", test_results_best)
        for key, value in test_results_best[0].items():
            results["best_model"][key].append(value)

        saved_epoch_model = JointDomainTaskAdapter.load_from_checkpoint(saved_epoch_checkpoint_path,source_dataset_length=source_length, target_dataset_length=target_length)
        test_results_saved_epoch = trainer.test(saved_epoch_model, test_loader)
        print("Test Results on saved epoch:", test_results_saved_epoch)
        for key, value in test_results_saved_epoch[0].items():
            results["epoch_saved"][key].append(value)

    except Exception as e:
        print(f"Error during testing: {e}")

    #wandb.finish()

Seed set to 10




Source dataset length: 1440
Target dataset length: 1440




Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  state_dict = torch.load(weights_file, map_location="cpu")


GPU available: True (cuda), used: True


TPU available: False, using: 0 TPU cores


IPU available: False, using: 0 IPUs


HPU available: False, using: 0 HPUs


`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..


`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..


`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..


Missing logger folder: checkpoints/lightning_logs


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | BertAdapterModel   | 119 M 
1 | criterion     | CrossEntropyLoss   | 0     
2 | mlm_criterion | CrossEntropyLoss   | 0     
3 | accuracy      | MulticlassAccuracy | 0     
4 | f1            | MulticlassF1Score  | 0     
5 | f1_macro      | MulticlassF1Score  | 0     
6 | f1_micro      | MulticlassF1Score  | 0     
7 | softmax       | Softmax            | 0     
-----------------------------------------------------
9.5 M     Trainable params
109 M     Non-trainable params
119 M     Total params
476.013   Total estimated model params size (MB)


Sanity Checking: |                                                                                            …

val/accuracy: 0.515625
val/f1: 0.6739379167556763
val/taskclf_loss: 1.0976126194000244
val/loss: 1.6285426616668701
val/mlm_loss: 2.159472703933716


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/pytorch_lightning/loops/fit_loop.py:298: The number of training batches (45) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |                                                                                                   …

Validation: |                                                                                                 …

val/accuracy: 0.90625
val/f1: 0.9070666432380676
val/taskclf_loss: 0.249190092086792
val/loss: 1.3821675777435303
val/mlm_loss: 2.5151448249816895


Validation: |                                                                                                 …

val/accuracy: 0.925000011920929
val/f1: 0.9257183074951172
val/taskclf_loss: 0.18641506135463715
val/loss: 1.1428638696670532
val/mlm_loss: 2.0993125438690186


Validation: |                                                                                                 …

val/accuracy: 0.9437500238418579
val/f1: 0.9438053965568542
val/taskclf_loss: 0.15121851861476898
val/loss: 0.9933668375015259
val/mlm_loss: 1.835515022277832


Validation: |                                                                                                 …

val/accuracy: 0.9312500357627869
val/f1: 0.9313066601753235
val/taskclf_loss: 0.16572938859462738
val/loss: 1.191404104232788
val/mlm_loss: 2.217078924179077


Validation: |                                                                                                 …

val/accuracy: 0.925000011920929
val/f1: 0.9251605272293091
val/taskclf_loss: 0.1464419662952423
val/loss: 1.1440433263778687
val/mlm_loss: 2.1416451930999756


Validation: |                                                                                                 …

val/accuracy: 0.918749988079071
val/f1: 0.9190029501914978
val/taskclf_loss: 0.13953199982643127
val/loss: 1.1332834959030151
val/mlm_loss: 2.127034902572632


Validation: |                                                                                                 …

val/accuracy: 0.925000011920929
val/f1: 0.9251605272293091
val/taskclf_loss: 0.14190040528774261
val/loss: 1.1974018812179565
val/mlm_loss: 2.252903461456299


Validation: |                                                                                                 …

val/accuracy: 0.9312500357627869
val/f1: 0.9313423037528992
val/taskclf_loss: 0.14308932423591614
val/loss: 1.187935709953308
val/mlm_loss: 2.2327821254730225


Validation: |                                                                                                 …

val/accuracy: 0.9312500357627869
val/f1: 0.9312314987182617
val/taskclf_loss: 0.14734935760498047
val/loss: 1.1181671619415283
val/mlm_loss: 2.088984727859497


Validation: |                                                                                                 …

`Trainer.fit` stopped: `max_epochs=10` reached.


val/accuracy: 0.9312500357627869
val/f1: 0.9312314987182617
val/taskclf_loss: 0.14710533618927002
val/loss: 1.2543443441390991
val/mlm_loss: 2.3615834712982178


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Best checkpoint path: checkpoints/lightning_logs/version_0/checkpoints/task-AMRUni-epoch=02-val_loss=0.99.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_0/checkpoints/AMRUni-epoch=05.ckpt


Testing: |                                                                                                    …

Test Results Last Epoch: [{'source_test/loss': 0.29019635915756226, 'source_test/accuracy': 0.915865421295166, 'source_test/f1': 0.9153589606285095, 'source_test/f1_macro': 0.9131931662559509, 'source_test/f1_micro': 0.915865421295166, 'target_test/loss': 0.569572389125824, 'target_test/accuracy': 0.75, 'target_test/f1': 0.7499204874038696, 'target_test/f1_macro': 0.7458463907241821, 'target_test/f1_micro': 0.75}]
Best checkpoint path: checkpoints/lightning_logs/version_0/checkpoints/task-AMRUni-epoch=02-val_loss=0.99.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_0/checkpoints/AMRUni-epoch=05.ckpt


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/lightning_fabric/utilities/cloud_io.py:57: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.




Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  state_dict = torch.load(weights_file, map_location="cpu")


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results on Best Model: [{'source_test/loss': 0.2512231767177582, 'source_test/accuracy': 0.9062500596046448, 'source_test/f1': 0.9057556390762329, 'source_test/f1_macro': 0.9032345414161682, 'source_test/f1_micro': 0.9062500596046448, 'target_test/loss': 0.5196360945701599, 'target_test/accuracy': 0.7668269276618958, 'target_test/f1': 0.7670761346817017, 'target_test/f1_macro': 0.762349545955658, 'target_test/f1_micro': 0.7668269276618958}]


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Seed set to 100


Test Results on saved epoch: [{'source_test/loss': 0.2739044725894928, 'source_test/accuracy': 0.9086538553237915, 'source_test/f1': 0.9081314206123352, 'source_test/f1_macro': 0.9058120250701904, 'source_test/f1_micro': 0.9086538553237915, 'target_test/loss': 0.5417183637619019, 'target_test/accuracy': 0.7572115659713745, 'target_test/f1': 0.7569241523742676, 'target_test/f1_macro': 0.7532508373260498, 'target_test/f1_micro': 0.7572115659713745}]


Source dataset length: 1440
Target dataset length: 1440




Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  state_dict = torch.load(weights_file, map_location="cpu")


GPU available: True (cuda), used: True


TPU available: False, using: 0 TPU cores


IPU available: False, using: 0 IPUs


HPU available: False, using: 0 HPUs


`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..


`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..


`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | BertAdapterModel   | 119 M 
1 | criterion     | CrossEntropyLoss   | 0     
2 | mlm_criterion | CrossEntropyLoss   | 0     
3 | accuracy      | MulticlassAccuracy | 0     
4 | f1            | MulticlassF1Score  | 0     
5 | f1_macro      | MulticlassF1Score  | 0     
6 | f1_micro      | MulticlassF1Score  | 0     
7 | softmax       | Softmax            | 0     
-----------------------------------------------------
9.5 M     Trainable params
109 M     Non-trainable params
119 M     Total params
476.013   Total estimated model params size (MB)


Sanity Checking: |                                                                                            …

val/accuracy: 0.453125
val/f1: 0.6013889312744141
val/taskclf_loss: 1.0885496139526367
val/loss: 2.1034560203552246
val/mlm_loss: 3.1183624267578125


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/pytorch_lightning/loops/fit_loop.py:298: The number of training batches (45) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |                                                                                                   …

Validation: |                                                                                                 …

val/accuracy: 0.8812500238418579
val/f1: 0.8833485841751099
val/taskclf_loss: 0.26353690028190613
val/loss: 1.2584413290023804
val/mlm_loss: 2.2533459663391113


Validation: |                                                                                                 …

val/accuracy: 0.9125000238418579
val/f1: 0.9139848947525024
val/taskclf_loss: 0.1921301633119583
val/loss: 1.0290855169296265
val/mlm_loss: 1.866040825843811


Validation: |                                                                                                 …

val/accuracy: 0.90625
val/f1: 0.9085492491722107
val/taskclf_loss: 0.20525531470775604
val/loss: 1.2955909967422485
val/mlm_loss: 2.3859267234802246


Validation: |                                                                                                 …

val/accuracy: 0.90625
val/f1: 0.9069108366966248
val/taskclf_loss: 0.18330824375152588
val/loss: 1.2304434776306152
val/mlm_loss: 2.277578830718994


Validation: |                                                                                                 …

val/accuracy: 0.9312500357627869
val/f1: 0.9313423037528992
val/taskclf_loss: 0.13687540590763092
val/loss: 1.1102650165557861
val/mlm_loss: 2.0836546421051025


Validation: |                                                                                                 …

val/accuracy: 0.9312500357627869
val/f1: 0.9313423037528992
val/taskclf_loss: 0.13485665619373322
val/loss: 1.1840280294418335
val/mlm_loss: 2.2331998348236084


Validation: |                                                                                                 …

val/accuracy: 0.9312500357627869
val/f1: 0.9313423037528992
val/taskclf_loss: 0.13573339581489563
val/loss: 1.2017821073532104
val/mlm_loss: 2.2678310871124268


Validation: |                                                                                                 …

val/accuracy: 0.9312500357627869
val/f1: 0.9313423037528992
val/taskclf_loss: 0.1355530470609665
val/loss: 1.1509274244308472
val/mlm_loss: 2.166301727294922


Validation: |                                                                                                 …

val/accuracy: 0.9312500357627869
val/f1: 0.9313423037528992
val/taskclf_loss: 0.13553424179553986
val/loss: 1.1274880170822144
val/mlm_loss: 2.1194419860839844


Validation: |                                                                                                 …

`Trainer.fit` stopped: `max_epochs=10` reached.


val/accuracy: 0.9312500357627869
val/f1: 0.9313423037528992
val/taskclf_loss: 0.13550281524658203
val/loss: 1.1123733520507812
val/mlm_loss: 2.0892441272735596


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Best checkpoint path: checkpoints/lightning_logs/version_1/checkpoints/task-AMRUni-epoch=01-val_loss=1.03.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_1/checkpoints/AMRUni-epoch=05.ckpt


Testing: |                                                                                                    …

Test Results Last Epoch: [{'source_test/loss': 0.27371060848236084, 'source_test/accuracy': 0.911057710647583, 'source_test/f1': 0.9106805324554443, 'source_test/f1_macro': 0.9081782698631287, 'source_test/f1_micro': 0.911057710647583, 'target_test/loss': 0.5498026013374329, 'target_test/accuracy': 0.7668269276618958, 'target_test/f1': 0.7670489549636841, 'target_test/f1_macro': 0.7618350386619568, 'target_test/f1_micro': 0.7668269276618958}]
Best checkpoint path: checkpoints/lightning_logs/version_1/checkpoints/task-AMRUni-epoch=01-val_loss=1.03.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_1/checkpoints/AMRUni-epoch=05.ckpt


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/lightning_fabric/utilities/cloud_io.py:57: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.




Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  state_dict = torch.load(weights_file, map_location="cpu")


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results on Best Model: [{'source_test/loss': 0.24963071942329407, 'source_test/accuracy': 0.9062500596046448, 'source_test/f1': 0.9060973525047302, 'source_test/f1_macro': 0.902510941028595, 'source_test/f1_micro': 0.9062500596046448, 'target_test/loss': 0.523516058921814, 'target_test/accuracy': 0.759615421295166, 'target_test/f1': 0.760340690612793, 'target_test/f1_macro': 0.7536036968231201, 'target_test/f1_micro': 0.759615421295166}]


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Seed set to 1000


Test Results on saved epoch: [{'source_test/loss': 0.2618352472782135, 'source_test/accuracy': 0.9182692766189575, 'source_test/f1': 0.9179010987281799, 'source_test/f1_macro': 0.9154117703437805, 'source_test/f1_micro': 0.9182692766189575, 'target_test/loss': 0.5409373641014099, 'target_test/accuracy': 0.764423131942749, 'target_test/f1': 0.7648549675941467, 'target_test/f1_macro': 0.759293794631958, 'target_test/f1_micro': 0.764423131942749}]


Source dataset length: 1440
Target dataset length: 1440




Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  state_dict = torch.load(weights_file, map_location="cpu")


GPU available: True (cuda), used: True


TPU available: False, using: 0 TPU cores


IPU available: False, using: 0 IPUs


HPU available: False, using: 0 HPUs


`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..


`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..


`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | BertAdapterModel   | 119 M 
1 | criterion     | CrossEntropyLoss   | 0     
2 | mlm_criterion | CrossEntropyLoss   | 0     
3 | accuracy      | MulticlassAccuracy | 0     
4 | f1            | MulticlassF1Score  | 0     
5 | f1_macro      | MulticlassF1Score  | 0     
6 | f1_micro      | MulticlassF1Score  | 0     
7 | softmax       | Softmax            | 0     
-----------------------------------------------------
9.5 M     Trainable params
109 M     Non-trainable params
119 M     Total params
476.013   Total estimated model params size (MB)


Sanity Checking: |                                                                                            …

val/accuracy: 0.03125
val/f1: 0.0068824402987957
val/taskclf_loss: 1.1625988483428955
val/loss: 1.8083505630493164
val/mlm_loss: 2.4541025161743164


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/pytorch_lightning/loops/fit_loop.py:298: The number of training batches (45) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |                                                                                                   …

Validation: |                                                                                                 …

val/accuracy: 0.9000000357627869
val/f1: 0.9006546139717102
val/taskclf_loss: 0.23422060906887054
val/loss: 1.4138230085372925
val/mlm_loss: 2.5934250354766846


Validation: |                                                                                                 …

val/accuracy: 0.918749988079071
val/f1: 0.9197203516960144
val/taskclf_loss: 0.19985629618167877
val/loss: 1.2828336954116821
val/mlm_loss: 2.3658111095428467


Validation: |                                                                                                 …

val/accuracy: 0.925000011920929
val/f1: 0.9252986907958984
val/taskclf_loss: 0.18344064056873322
val/loss: 1.1954678297042847
val/mlm_loss: 2.2074952125549316


Validation: |                                                                                                 …

val/accuracy: 0.9312500357627869
val/f1: 0.9313066601753235
val/taskclf_loss: 0.1755632907152176
val/loss: 1.276828408241272
val/mlm_loss: 2.3780934810638428


Validation: |                                                                                                 …

val/accuracy: 0.9375
val/f1: 0.9376983642578125
val/taskclf_loss: 0.14912977814674377
val/loss: 1.1498253345489502
val/mlm_loss: 2.1505208015441895


Validation: |                                                                                                 …

val/accuracy: 0.9437500238418579
val/f1: 0.9440422058105469
val/taskclf_loss: 0.1635175198316574
val/loss: 1.0408399105072021
val/mlm_loss: 1.918162226676941


Validation: |                                                                                                 …

val/accuracy: 0.9312500357627869
val/f1: 0.9314289093017578
val/taskclf_loss: 0.1790715456008911
val/loss: 1.123922348022461
val/mlm_loss: 2.0687732696533203


Validation: |                                                                                                 …

val/accuracy: 0.925000011920929
val/f1: 0.9255749583244324
val/taskclf_loss: 0.20792463421821594
val/loss: 1.2650508880615234
val/mlm_loss: 2.3221771717071533


Validation: |                                                                                                 …

val/accuracy: 0.925000011920929
val/f1: 0.9250866174697876
val/taskclf_loss: 0.17728640139102936
val/loss: 1.078300952911377
val/mlm_loss: 1.9793156385421753


Validation: |                                                                                                 …

`Trainer.fit` stopped: `max_epochs=10` reached.


val/accuracy: 0.9312500357627869
val/f1: 0.9316151738166809
val/taskclf_loss: 0.17458026111125946
val/loss: 1.1565628051757812
val/mlm_loss: 2.138545513153076


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Best checkpoint path: checkpoints/lightning_logs/version_2/checkpoints/task-AMRUni-epoch=05-val_loss=1.04.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_2/checkpoints/AMRUni-epoch=05.ckpt


Testing: |                                                                                                    …

Test Results Last Epoch: [{'source_test/loss': 0.3246852457523346, 'source_test/accuracy': 0.9134615659713745, 'source_test/f1': 0.9130885601043701, 'source_test/f1_macro': 0.9104059338569641, 'source_test/f1_micro': 0.9134615659713745, 'target_test/loss': 0.6298784613609314, 'target_test/accuracy': 0.7716346383094788, 'target_test/f1': 0.7717576026916504, 'target_test/f1_macro': 0.7668190002441406, 'target_test/f1_micro': 0.7716346383094788}]
Best checkpoint path: checkpoints/lightning_logs/version_2/checkpoints/task-AMRUni-epoch=05-val_loss=1.04.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_2/checkpoints/AMRUni-epoch=05.ckpt


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/lightning_fabric/utilities/cloud_io.py:57: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.




Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  state_dict = torch.load(weights_file, map_location="cpu")


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results on Best Model: [{'source_test/loss': 0.2781248986721039, 'source_test/accuracy': 0.915865421295166, 'source_test/f1': 0.9154711961746216, 'source_test/f1_macro': 0.912734866142273, 'source_test/f1_micro': 0.915865421295166, 'target_test/loss': 0.5521131753921509, 'target_test/accuracy': 0.7788462042808533, 'target_test/f1': 0.7800495624542236, 'target_test/f1_macro': 0.7727762460708618, 'target_test/f1_micro': 0.7788462042808533}]


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results on saved epoch: [{'source_test/loss': 0.2781248986721039, 'source_test/accuracy': 0.915865421295166, 'source_test/f1': 0.9154711961746216, 'source_test/f1_macro': 0.912734866142273, 'source_test/f1_micro': 0.915865421295166, 'target_test/loss': 0.5521131753921509, 'target_test/accuracy': 0.7788462042808533, 'target_test/f1': 0.7800495624542236, 'target_test/f1_macro': 0.7727762460708618, 'target_test/f1_micro': 0.7788462042808533}]


In [6]:
results.items()

dict_items([('last_epoch', {'source_test/loss': [0.29019635915756226, 0.27371060848236084, 0.3246852457523346], 'source_test/accuracy': [0.915865421295166, 0.911057710647583, 0.9134615659713745], 'source_test/f1': [0.9153589606285095, 0.9106805324554443, 0.9130885601043701], 'source_test/f1_macro': [0.9131931662559509, 0.9081782698631287, 0.9104059338569641], 'source_test/f1_micro': [0.915865421295166, 0.911057710647583, 0.9134615659713745], 'target_test/loss': [0.569572389125824, 0.5498026013374329, 0.6298784613609314], 'target_test/accuracy': [0.75, 0.7668269276618958, 0.7716346383094788], 'target_test/f1': [0.7499204874038696, 0.7670489549636841, 0.7717576026916504], 'target_test/f1_macro': [0.7458463907241821, 0.7618350386619568, 0.7668190002441406], 'target_test/f1_micro': [0.75, 0.7668269276618958, 0.7716346383094788]}), ('best_model', {'source_test/loss': [0.2512231767177582, 0.24963071942329407, 0.2781248986721039], 'source_test/accuracy': [0.9062500596046448, 0.906250059604644

In [7]:
# Calculate mean and standard deviation for each scenario
mean_results = {scenario: {key: np.mean(values) for key, values in metrics.items()} for scenario, metrics in results.items()}
std_results = {scenario: {key: np.std(values) for key, values in metrics.items()} for scenario, metrics in results.items()}

# Log mean and standard deviation results to wandb
# wandb.init(project=project_name, name=f'{domain}_mean_results')
# for scenario in mean_results:
#     for key, value in mean_results[scenario].items():
#         wandb.log({f"{scenario}/{key}": value})
#         wandb.log({f"{scenario}/{key}_std": std_results[scenario][key]})
# wandb.finish()

print("Mean Results:", mean_results)
print("Standard Deviation Results:", std_results)

# # Save the best model's adapter
# if model:
#     adapter_save_path = f"../../saved/adapter_after_run/{hparams['task_adapter_name']}"
#     model.save_adapter(adapter_save_path, hparams['task_adapter_name'])
#     print(f"Adapter saved to {adapter_save_path}")
# else:
#     print("No best model to save.")

Mean Results: {'last_epoch': {'source_test/loss': 0.2961974044640859, 'source_test/accuracy': 0.9134615659713745, 'source_test/f1': 0.913042684396108, 'source_test/f1_macro': 0.9105924566586813, 'source_test/f1_micro': 0.9134615659713745, 'target_test/loss': 0.5830844839413961, 'target_test/accuracy': 0.7628205219904581, 'target_test/f1': 0.7629090150197347, 'target_test/f1_macro': 0.7581668098767599, 'target_test/f1_micro': 0.7628205219904581}, 'best_model': {'source_test/loss': 0.25965959827105206, 'source_test/accuracy': 0.9094551801681519, 'source_test/f1': 0.9091080625851949, 'source_test/f1_macro': 0.9061601161956787, 'source_test/f1_micro': 0.9094551801681519, 'target_test/loss': 0.5317551096280416, 'target_test/accuracy': 0.7684295177459717, 'target_test/f1': 0.7691554625829061, 'target_test/f1_macro': 0.7629098296165466, 'target_test/f1_micro': 0.7684295177459717}, 'epoch_saved': {'source_test/loss': 0.2712882061799367, 'source_test/accuracy': 0.914262851079305, 'source_test/f

In [8]:
print('dones')

dones


In [9]:
best_val_loss

inf