In [1]:

import torch
import gc
torch.cuda.empty_cache()
gc.collect()


0

In [2]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
# Initialize the console

# Step 2: Import necessary libraries
from typing import Optional, Dict, Any
import os
import torch
import pytorch_lightning as pl
import pandas as pd
from transformers import AutoTokenizer
from torch.utils.data import Dataset, DataLoader
from rich.traceback import install
from pytorch_lightning.loggers import WandbLogger
import torch.nn as nn
import torch.optim as optim
from transformers import AutoConfig
from adapters import AutoAdapterModel, AdapterConfig
from adapters.composition import Stack
from pytorch_lightning import seed_everything, Trainer
from pytorch_lightning.callbacks import ModelCheckpoint,EarlyStopping
import torchmetrics

install(show_locals=True)

from setup import setup_src_path
print(setup_src_path())
import data.processed as processed
import config.config as config
import utils.setup as setup
import utils.functions as fn
from importlib import reload

from datasets import load_from_disk

print(config.Config.TXT_SAVE_PATH)
print(config.Config.MODEL_SAVE_PATH)

#dataset = load_from_disk(f"../{config.Config.DATASETS_SAVE_PATH}/datasets")


['/usr/lib/python38.zip', '/usr/lib/python3.8', '/usr/lib/python3.8/lib-dynload', '', '/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages', '/tmp/tmp0xn5kimd', '/home/guest/Desktop/projects/third-experiments/SDA_experiments/modules']


2024-09-29 20:23:15.306789: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-09-29 20:23:15.338175: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.




./text-files/
./hp-model-


In [3]:
import torch
import os
import pytorch_lightning as pl
from transformers import  AutoConfig, DataCollatorForLanguageModeling
from collections import defaultdict
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
import numpy as np
import torchmetrics

class JointDomainTaskAdapter(pl.LightningModule):
    def __init__(self, hparams,source_dataset_length,target_dataset_length):
        super(JointDomainTaskAdapter, self).__init__()
        self.save_hyperparameters(hparams)

        # Load config with hidden states output
        self.config = AutoConfig.from_pretrained(self.hparams["pretrained_model_name"])
        self.config.output_hidden_states = True
        self.model = AutoAdapterModel.from_pretrained(self.hparams["pretrained_model_name"], config=self.config)

        # Set reduction factor and leave_out layers
        self.reduction_factor = self.hparams.get("reduction_factor", 16)
        self.leave_out = self.hparams.get("leave_out", [])
        # if self.leave_out != "None":
        #     self.leave_out = self.leave_out.split(",")
        #     self.leave_out = [int(i) for i in self.leave_out]
        # else:
        #     self.leave_out = []

        # Load MLM adapter with head
        self.model.load_adapter(f"{self.hparams['saved_adapter_dir']}/{self.hparams['domain_adapter_name']}", with_head=True)

        # Add classification head for the task
        self.model.add_classification_head(f"{self.hparams['task_adapter_name']}", num_labels=self.hparams["num_classes"])

        # Set active adapters
        self.model.train_adapter(self.hparams['domain_adapter_name'])
        # Calculate alpha based on dataset lengths
        self.alpha = source_dataset_length / (source_dataset_length + target_dataset_length)

        # Initialize loss functions and metrics
        self.criterion = nn.CrossEntropyLoss()
        self.mlm_criterion = nn.CrossEntropyLoss()
        self.accuracy = torchmetrics.Accuracy(task='multiclass', num_classes=self.hparams["num_classes"])
        self.f1 = torchmetrics.F1Score(task='multiclass', num_classes=self.hparams["num_classes"], average="weighted")
        self.f1_macro = torchmetrics.F1Score(task='multiclass', num_classes=self.hparams["num_classes"], average="macro")
        self.f1_micro = torchmetrics.F1Score(task='multiclass',num_classes=self.hparams["num_classes"], average="micro")

        self.softmax = nn.Softmax(dim=1)
        self.validation_outputs = []
        self.test_outputs = []
        # Optimizer related variables
        self.learning_rate = self.hparams.get("learning_rate", 1e-4)
        self.scheduler_factor = self.hparams.get("scheduler_factor", 0.1)
        self.scheduler_patience = self.hparams.get("scheduler_patience", 0.05)
        self.scheduler_threshold = self.hparams.get("scheduler_threshold", 0.0001)
        self.scheduler_cooldown = self.hparams.get("scheduler_cooldown", 0)
        self.scheduler_eps = self.hparams.get("scheduler_eps", 1e-8)

    def forward(self, input_ids, attention_mask=None, labels=None, task=None):
        if task == "mlm":
            self.model.active_head= self.hparams['domain_adapter_name']
            outputs = self.model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        elif task == "classification":
            self.model.active_head= self.hparams['task_adapter_name']
            outputs = self.model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        else:
            raise ValueError("Task must be either 'mlm' or 'classification'.")
        return outputs

    def training_step(self, batch, batch_idx):
        # Separate source and target data processing
        source_input_ids = batch["source_input_ids"]
        source_attention_mask = batch["source_attention_mask"]
        source_labels = batch["label_source"]

        target_input_ids = batch["target_input_ids"]
        target_attention_mask = batch["target_attention_mask"]
        mlm_labels = batch["mlm_labels"]

        # Calculate dynamic alpha based on the lengths of source and target data
        alpha = self.alpha

        # Classification task
        cls_outputs = self(input_ids=source_input_ids, attention_mask=source_attention_mask, task="classification")
        cls_logits = cls_outputs.logits
        task_loss = self.criterion(cls_logits, source_labels)

        # MLM task
        mlm_outputs = self(input_ids=target_input_ids, attention_mask=target_attention_mask, labels=mlm_labels, task="mlm")
        mlm_loss = mlm_outputs.loss

        # Combine losses
        loss = alpha * task_loss + (1 - alpha) * mlm_loss

        accuracy = self.accuracy(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))
        f1 = self.f1(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))

        metrics = {
            "train/accuracy": accuracy,
            "train/f1": f1,
            "train/taskclf_loss": task_loss,
            "train/loss": loss,
            "train/mlm_loss": mlm_loss
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)

        return loss

    def validation_step(self, batch, batch_idx):
        # Separate source and target data processing
        source_input_ids = batch["source_input_ids"]
        source_attention_mask = batch["source_attention_mask"]
        source_labels = batch["label_source"]

        target_input_ids = batch["target_input_ids"]
        target_attention_mask = batch["target_attention_mask"]
        mlm_labels = batch["mlm_labels"]

        # Calculate dynamic alpha based on the lengths of source and target data
        alpha = self.alpha
        # Classification task
        cls_outputs = self(input_ids=source_input_ids, attention_mask=source_attention_mask, task="classification")
        cls_logits = cls_outputs.logits
        task_loss = self.criterion(cls_logits, source_labels)

        # MLM task
        mlm_outputs = self(input_ids=target_input_ids, attention_mask=target_attention_mask, labels=mlm_labels, task="mlm")
        mlm_loss = mlm_outputs.loss

        # Combine losses
        loss = alpha * task_loss + (1 - alpha) * mlm_loss

        accuracy = self.accuracy(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))
        f1 = self.f1(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))
        self.validation_outputs.append({
            "val/accuracy": accuracy,
            "val/f1": f1,
            "val/taskclf_loss": task_loss,
            "val/loss": loss,
            "val/mlm_loss": mlm_loss
                })
        
        metrics = {
            "val/accuracy": accuracy,
            "val/f1": f1,
            "val/taskclf_loss": task_loss,
            "val/loss": loss,
            "val/mlm_loss": mlm_loss
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)

        return metrics
    def on_validation_epoch_start(self):
        self.validation_outputs = []
    
    def on_validation_epoch_end(self):
        outputs= self.validation_outputs

        avg_loss = torch.stack([x["val/loss"] for x in outputs]).mean()
        avg_task_loss = torch.stack([x["val/taskclf_loss"] for x in outputs]).mean()
        avg_mlm_loss = torch.stack([x["val/mlm_loss"] for x in outputs]).mean()
        avg_accuracy = torch.stack([x["val/accuracy"] for x in outputs]).mean()
        avg_f1 = torch.stack([x["val/f1"] for x in outputs]).mean()
        print(f"val/accuracy: {avg_accuracy}")
        print(f"val/f1: {avg_f1}")
        print(f"val/taskclf_loss: {avg_task_loss}")
        print(f"val/loss: {avg_loss}")
        print(f"val/mlm_loss: {avg_mlm_loss}")
        metrics = {
            "val/avg_loss": avg_loss,
            "val/avg_taskclf_loss": avg_task_loss,
            "val/avg_mlm_loss": avg_mlm_loss,
            "val/avg_accuracy": avg_accuracy,
            "val/avg_f1": avg_f1,
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)
        self.log("val_loss", avg_loss)

    def test_step(self, batch, batch_idx):
        # Separate source and target data processing
        source_input_ids = batch["source_input_ids"]
        source_attention_mask = batch["source_attention_mask"]
        source_labels = batch["label_source"]

        target_input_ids = batch["target_input_ids"]
        target_attention_mask = batch["target_attention_mask"]
        target_labels = batch["label_target"]

        # Classification task for source data
        cls_outputs_source = self(input_ids=source_input_ids, attention_mask=source_attention_mask, task="classification")
        cls_logits_source = cls_outputs_source.logits
        task_loss_source = self.criterion(cls_logits_source, source_labels)

        # Classification task for target data
        cls_outputs_target = self(input_ids=target_input_ids, attention_mask=target_attention_mask, task="classification")
        cls_logits_target = cls_outputs_target.logits
        task_loss_target = self.criterion(cls_logits_target, target_labels)

        # Combine losses (though typically you would evaluate them separately)
        loss = task_loss_source + task_loss_target

        accuracy_source = self.accuracy(source_labels, torch.argmax(self.softmax(cls_logits_source), dim=1))
        f1_source = self.f1(source_labels, torch.argmax(self.softmax(cls_logits_source), dim=1))
        f1_macro_source = self.f1_macro(source_labels, torch.argmax(self.softmax(cls_logits_source), dim=1))
        f1_micro_source = self.f1_micro(source_labels, torch.argmax(self.softmax(cls_logits_source), dim=1))
    
        accuracy_target = self.accuracy(target_labels, torch.argmax(self.softmax(cls_logits_target), dim=1))
        f1_target = self.f1(target_labels, torch.argmax(self.softmax(cls_logits_target), dim=1))
        f1_macro_target = self.f1_macro(target_labels, torch.argmax(self.softmax(cls_logits_target), dim=1))
        f1_micro_target = self.f1_micro(target_labels, torch.argmax(self.softmax(cls_logits_target), dim=1))

        metrics = {
            "source_test/loss": task_loss_source,
            "source_test/accuracy": accuracy_source,
            "source_test/f1": f1_source,
            "source_test/f1_macro": f1_macro_source,
            "source_test/f1_micro": f1_micro_source,
            "target_test/loss": task_loss_target,
            "target_test/accuracy": accuracy_target,
            "target_test/f1": f1_target,
            "target_test/f1_macro": f1_macro_target,
            "target_test/f1_micro": f1_micro_target,
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)

        self.test_outputs.append({
            "source_test/loss": task_loss_source,
            "source_test/accuracy": accuracy_source,
            "source_test/f1": f1_source,
            "source_test/f1_macro": f1_macro_source,
            "source_test/f1_micro": f1_micro_source,
            "target_test/loss": task_loss_target,
            "target_test/accuracy": accuracy_target,
            "target_test/f1": f1_target,
            "target_test/f1_macro": f1_macro_target,
            "target_test/f1_micro": f1_micro_target,
        })
        return metrics
    def on_test_epoch_start(self):
        self.test_outputs = []

    def on_test_epoch_end(self):
        outputs=  self.test_outputs

        avg_loss_source = torch.stack([x["source_test/loss"] for x in outputs]).mean()
        avg_task_loss_target = torch.stack([x["target_test/loss"] for x in outputs]).mean()
        avg_accuracy_source = torch.stack([x["source_test/accuracy"] for x in outputs]).mean()
        avg_f1_source = torch.stack([x["source_test/f1"] for x in outputs]).mean()
        avg_f1_macro_source = torch.stack([x["source_test/f1_macro"] for x in outputs]).mean()
        avg_f1_micro_source = torch.stack([x["source_test/f1_micro"] for x in outputs]).mean()

        avg_accuracy_target = torch.stack([x["target_test/accuracy"] for x in outputs]).mean()
        avg_f1_target = torch.stack([x["target_test/f1"] for x in outputs]).mean()
        avg_f1_macro_target = torch.stack([x["target_test/f1_macro"] for x in outputs]).mean()
        avg_f1_micro_target = torch.stack([x["target_test/f1_micro"] for x in outputs]).mean()

        metrics = {
            "source_test/loss": avg_loss_source,
            "target_test/loss": avg_task_loss_target,
            "source_test/accuracy": avg_accuracy_source,
            "source_test/f1": avg_f1_source,
            "source_test/f1_macro": avg_f1_macro_source,
            "source_test/f1_micro": avg_f1_micro_source,
            "target_test/accuracy": avg_accuracy_target,
            "target_test/f1": avg_f1_target,
            "target_test/f1_macro": avg_f1_macro_target,
            "target_test/f1_micro": avg_f1_micro_target,
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)
    def save_adapter(self, location, adapter_name):
        self.model.save_adapter(location, adapter_name)

    def configure_optimizers(self):
        optimizer = optim.AdamW(self.parameters(), lr=self.hparams["learning_rate"])
        lr_scheduler = {
            'scheduler': optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=2, threshold=0.0001, cooldown=0, min_lr=1e-8),
            'monitor': 'val_loss'
        }
        return [optimizer], [lr_scheduler]


In [4]:
import wandb

wandb.login()
# Wandb setup and training loop
seeds = [10, 100, 1000]  # List of seeds
project_name = 'SDA_mixed_edited'  # Replace with your wandb project name
domain = 'BOA'  # Replace with the specific domain for this notebook
type = 'unipelt'  # Replace with the specific type for this notebook

# Initialize results dictionary
results = {
    "last_epoch": {
        "source_test/loss": [],
        "source_test/accuracy": [],
        "source_test/f1": [],
        "source_test/f1_macro": [],
        "source_test/f1_micro": [],
        "target_test/loss": [],
        "target_test/accuracy": [],
        "target_test/f1": [],
        "target_test/f1_macro": [],
        "target_test/f1_micro": [],
    },
    "best_model": {
        "source_test/loss": [],
        "source_test/accuracy": [],
        "source_test/f1": [],
        "source_test/f1_macro": [],
        "source_test/f1_micro": [],
        "target_test/loss": [],
        "target_test/accuracy": [],
        "target_test/f1": [],
        "target_test/f1_macro": [],
        "target_test/f1_micro": [],
    },
    "epoch_saved": {
        "source_test/loss": [],
        "source_test/accuracy": [],
        "source_test/f1": [],
        "source_test/f1_macro": [],
        "source_test/f1_micro": [],
        "target_test/loss": [],
        "target_test/accuracy": [],
        "target_test/f1": [],
        "target_test/f1_macro": [],
        "target_test/f1_micro": [],
    }
}

best_val_loss = float('inf')
best_model = None
best_model_path = ""

[34m[1mwandb[0m: Currently logged in as: [33mmrawhani5[0m ([33mmrawhani[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [5]:
reload(processed)
for seed in seeds:
    #wandb.init(project=project_name, name=f'{domain}_{type}_run_with_seed_{seed}', config={'seed': seed})

    try:
        seed_everything(seed)

        hparams = {
            "source_target": "books_apparel",
            "dataset_cache_dir": "./../../datasets",
            "pretrained_model_name": "bert-base-uncased",
            "padding": True,
            "source_domain": "books",
            "target_domain": "apparel",
            "domain_adapter_name": "mlm_unipelt_apparel",
            "task_adapter_name": "BOAPelt",
            "padding": "max_length",
            "max_seq_length": 512,
            "bsz": 32,
            "num_classes": 3,
            "learning_rate": 1e-4,
            "reduction_factor": 16,
            "mode": "domain",
            "saved_adapter_dir": "../saved/adapters",
        }

        save_dir = "checkpoints"
        save_epoch_3 = 6  # Save model at the 3rd epoch
        
        dm = processed.DataModuleSourceTargetMixed(hparams)
        dm.setup('fit')
        dm.setup("test")
        source_length, target_length = dm.get_dataset_lengths()
        print(f"Source dataset length: {source_length}")
        print(f"Target dataset length: {target_length}")
        model = JointDomainTaskAdapter(hparams,source_length,target_length)

        checkpoint_callback = ModelCheckpoint(
            filename="task-BOAPelt-{epoch:02d}-{val_loss:.2f}",
            save_top_k=1,
            monitor="val_loss",
            mode="min",
        )
        save_model_callback_epoch = ModelCheckpoint(
            filename="BOAPelt-{epoch:02d}",
            every_n_epochs=save_epoch_3,
            save_top_k=-1,
        )

        #wandb_logger = WandbLogger()

    except Exception as e:
        print(f"Error during preprocessing : {e}")

    try:
        train_loader = dm.train_dataloader()
        val_loader = dm.val_dataloader()
        trainer = Trainer(
            max_epochs=10,
            accelerator="auto",
            #precision=16,
            
            default_root_dir="checkpoints",
            #logger=wandb_logger,
            callbacks=[checkpoint_callback, save_model_callback_epoch],
            limit_train_batches=1.0,
            limit_val_batches=1.0,
            limit_test_batches=1.0,
        )

        trainer.fit(model, train_loader, val_loader)
        print(f"Best checkpoint path: {checkpoint_callback.best_model_path}")
        print(f"Saved epoch checkpoint path: {save_model_callback_epoch.best_model_path}")
    except Exception as e:
        print(f"Error during training : {e}")

    try:
        dm.setup("test")
        test_loader = dm.test_dataloader()
        test_results_last = trainer.test(model, test_loader)
        print("Test Results Last Epoch:", test_results_last)

        # Collect results for last epoch model
        for key, value in test_results_last[0].items():
            results["last_epoch"][key].append(value)

        # Paths to the saved checkpoints
        best_checkpoint_path = checkpoint_callback.best_model_path
        saved_epoch_checkpoint_path = save_model_callback_epoch.best_model_path
        print(f"Best checkpoint path: {best_checkpoint_path}")
        print(f"Saved epoch checkpoint path: {saved_epoch_checkpoint_path}")

        best_model = JointDomainTaskAdapter.load_from_checkpoint(best_checkpoint_path,source_dataset_length=source_length, target_dataset_length=target_length)
        test_results_best = trainer.test(best_model, test_loader)
        print("Test Results on Best Model:", test_results_best)
        for key, value in test_results_best[0].items():
            results["best_model"][key].append(value)

        saved_epoch_model = JointDomainTaskAdapter.load_from_checkpoint(saved_epoch_checkpoint_path,source_dataset_length=source_length, target_dataset_length=target_length)
        test_results_saved_epoch = trainer.test(saved_epoch_model, test_loader)
        print("Test Results on saved epoch:", test_results_saved_epoch)
        for key, value in test_results_saved_epoch[0].items():
            results["epoch_saved"][key].append(value)

    except Exception as e:
        print(f"Error during testing: {e}")

    #wandb.finish()

Seed set to 10




Source dataset length: 1440
Target dataset length: 1440




Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  state_dict = torch.load(weights_file, map_location="cpu")


GPU available: True (cuda), used: True


TPU available: False, using: 0 TPU cores


IPU available: False, using: 0 IPUs


HPU available: False, using: 0 HPUs


`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..


`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..


`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | BertAdapterModel   | 122 M 
1 | criterion     | CrossEntropyLoss   | 0     
2 | mlm_criterion | CrossEntropyLoss   | 0     
3 | accuracy      | MulticlassAccuracy | 0     
4 | f1            | MulticlassF1Score  | 0     
5 | f1_macro      | MulticlassF1Score  | 0     
6 | f1_micro      | MulticlassF1Score  | 0     
7 | softmax       | Softmax            | 0     
-----------------------------------------------------
12.9 M    Trainable params
109 M     Non-trainable params
122 M     Total params
489.615   Total estimated model params size (MB)


Sanity Checking: |                                                                                            …

val/accuracy: 0.15625
val/f1: 0.07815331220626831
val/taskclf_loss: 1.1182801723480225
val/loss: 1.5421907901763916
val/mlm_loss: 1.9661014080047607


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/pytorch_lightning/loops/fit_loop.py:298: The number of training batches (45) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |                                                                                                   …

Validation: |                                                                                                 …

val/accuracy: 0.8687500357627869
val/f1: 0.8693246841430664
val/taskclf_loss: 0.28522178530693054
val/loss: 1.2291792631149292
val/mlm_loss: 2.1731364727020264


Validation: |                                                                                                 …

val/accuracy: 0.9312500357627869
val/f1: 0.9313175082206726
val/taskclf_loss: 0.22431574761867523
val/loss: 1.2151975631713867
val/mlm_loss: 2.2060794830322266


Validation: |                                                                                                 …

val/accuracy: 0.9375
val/f1: 0.9376345872879028
val/taskclf_loss: 0.2005969136953354
val/loss: 1.1658625602722168
val/mlm_loss: 2.1311280727386475


Validation: |                                                                                                 …

val/accuracy: 0.893750011920929
val/f1: 0.8946318030357361
val/taskclf_loss: 0.22952763736248016
val/loss: 1.1960324048995972
val/mlm_loss: 2.162537097930908


Validation: |                                                                                                 …

val/accuracy: 0.9125000238418579
val/f1: 0.9138298034667969
val/taskclf_loss: 0.21852441132068634
val/loss: 1.1981314420700073
val/mlm_loss: 2.1777381896972656


Validation: |                                                                                                 …

val/accuracy: 0.925000011920929
val/f1: 0.9253968596458435
val/taskclf_loss: 0.20294542610645294
val/loss: 1.1584850549697876
val/mlm_loss: 2.1140246391296387


Validation: |                                                                                                 …

val/accuracy: 0.918749988079071
val/f1: 0.919227123260498
val/taskclf_loss: 0.2320626825094223
val/loss: 1.1728622913360596
val/mlm_loss: 2.113661527633667


Validation: |                                                                                                 …

val/accuracy: 0.925000011920929
val/f1: 0.9251357913017273
val/taskclf_loss: 0.22972238063812256
val/loss: 1.1996753215789795
val/mlm_loss: 2.169628143310547


Validation: |                                                                                                 …

val/accuracy: 0.9312500357627869
val/f1: 0.9313673973083496
val/taskclf_loss: 0.22401633858680725
val/loss: 1.1652441024780273
val/mlm_loss: 2.1064717769622803


Validation: |                                                                                                 …

val/accuracy: 0.9437500238418579
val/f1: 0.9439399838447571
val/taskclf_loss: 0.21084026992321014
val/loss: 1.1252965927124023
val/mlm_loss: 2.039752960205078


`Trainer.fit` stopped: `max_epochs=10` reached.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Best checkpoint path: checkpoints/lightning_logs/version_21/checkpoints/task-BOAPelt-epoch=09-val_loss=1.13.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_21/checkpoints/BOAPelt-epoch=05.ckpt


Testing: |                                                                                                    …

Test Results Last Epoch: [{'source_test/loss': 0.2373732030391693, 'source_test/accuracy': 0.9086538553237915, 'source_test/f1': 0.9080637097358704, 'source_test/f1_macro': 0.9062641263008118, 'source_test/f1_micro': 0.9086538553237915, 'target_test/loss': 0.3832109272480011, 'target_test/accuracy': 0.8990384936332703, 'target_test/f1': 0.8983282446861267, 'target_test/f1_macro': 0.8960915207862854, 'target_test/f1_micro': 0.8990384936332703}]
Best checkpoint path: checkpoints/lightning_logs/version_21/checkpoints/task-BOAPelt-epoch=09-val_loss=1.13.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_21/checkpoints/BOAPelt-epoch=05.ckpt


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/lightning_fabric/utilities/cloud_io.py:57: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.




Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  state_dict = torch.load(weights_file, map_location="cpu")


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results on Best Model: [{'source_test/loss': 0.2373732030391693, 'source_test/accuracy': 0.9086538553237915, 'source_test/f1': 0.9080637097358704, 'source_test/f1_macro': 0.9062641263008118, 'source_test/f1_micro': 0.9086538553237915, 'target_test/loss': 0.3832109272480011, 'target_test/accuracy': 0.8990384936332703, 'target_test/f1': 0.8983282446861267, 'target_test/f1_macro': 0.8960915207862854, 'target_test/f1_micro': 0.8990384936332703}]


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Seed set to 100


Test Results on saved epoch: [{'source_test/loss': 0.2334204912185669, 'source_test/accuracy': 0.9086538553237915, 'source_test/f1': 0.9079996347427368, 'source_test/f1_macro': 0.9066831469535828, 'source_test/f1_micro': 0.9086538553237915, 'target_test/loss': 0.28453710675239563, 'target_test/accuracy': 0.8942307829856873, 'target_test/f1': 0.8943992853164673, 'target_test/f1_macro': 0.8895087242126465, 'target_test/f1_micro': 0.8942307829856873}]


Source dataset length: 1440
Target dataset length: 1440




Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  state_dict = torch.load(weights_file, map_location="cpu")


GPU available: True (cuda), used: True


TPU available: False, using: 0 TPU cores


IPU available: False, using: 0 IPUs


HPU available: False, using: 0 HPUs


`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..


`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..


`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | BertAdapterModel   | 122 M 
1 | criterion     | CrossEntropyLoss   | 0     
2 | mlm_criterion | CrossEntropyLoss   | 0     
3 | accuracy      | MulticlassAccuracy | 0     
4 | f1            | MulticlassF1Score  | 0     
5 | f1_macro      | MulticlassF1Score  | 0     
6 | f1_micro      | MulticlassF1Score  | 0     
7 | softmax       | Softmax            | 0     
-----------------------------------------------------
12.9 M    Trainable params
109 M     Non-trainable params
122 M     Total params
489.615   Total estimated model params size (MB)


Sanity Checking: |                                                                                            …

val/accuracy: 0.0
val/f1: 0.0
val/taskclf_loss: 1.1719928979873657
val/loss: 1.5562982559204102
val/mlm_loss: 1.9406037330627441


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/pytorch_lightning/loops/fit_loop.py:298: The number of training batches (45) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |                                                                                                   …

Validation: |                                                                                                 …

val/accuracy: 0.8812500238418579
val/f1: 0.8815540671348572
val/taskclf_loss: 0.3376900851726532
val/loss: 1.2083386182785034
val/mlm_loss: 2.0789871215820312


Validation: |                                                                                                 …

val/accuracy: 0.90625
val/f1: 0.9066364169120789
val/taskclf_loss: 0.24702425301074982
val/loss: 1.1820343732833862
val/mlm_loss: 2.117044448852539


Validation: |                                                                                                 …

val/accuracy: 0.9125000238418579
val/f1: 0.9128319025039673
val/taskclf_loss: 0.2287123203277588
val/loss: 1.1561716794967651
val/mlm_loss: 2.0836310386657715


Validation: |                                                                                                 …

val/accuracy: 0.9000000357627869
val/f1: 0.9015274047851562
val/taskclf_loss: 0.26603543758392334
val/loss: 1.1287857294082642
val/mlm_loss: 1.991536021232605


Validation: |                                                                                                 …

val/accuracy: 0.9000000357627869
val/f1: 0.9009370803833008
val/taskclf_loss: 0.27240848541259766
val/loss: 1.218684434890747
val/mlm_loss: 2.1649603843688965


Validation: |                                                                                                 …

val/accuracy: 0.918749988079071
val/f1: 0.9191163182258606
val/taskclf_loss: 0.22153626382350922
val/loss: 1.1543149948120117
val/mlm_loss: 2.0870938301086426


Validation: |                                                                                                 …

val/accuracy: 0.925000011920929
val/f1: 0.925409734249115
val/taskclf_loss: 0.2233874797821045
val/loss: 1.1321016550064087
val/mlm_loss: 2.040815830230713


Validation: |                                                                                                 …

val/accuracy: 0.9437500238418579
val/f1: 0.9438172578811646
val/taskclf_loss: 0.19311287999153137
val/loss: 1.1295034885406494
val/mlm_loss: 2.06589412689209


Validation: |                                                                                                 …

val/accuracy: 0.9437500238418579
val/f1: 0.9438172578811646
val/taskclf_loss: 0.18974058330059052
val/loss: 1.1829451322555542
val/mlm_loss: 2.176149606704712


Validation: |                                                                                                 …

val/accuracy: 0.9437500238418579
val/f1: 0.9438172578811646
val/taskclf_loss: 0.19214193522930145
val/loss: 1.118057131767273
val/mlm_loss: 2.0439720153808594


`Trainer.fit` stopped: `max_epochs=10` reached.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Best checkpoint path: checkpoints/lightning_logs/version_22/checkpoints/task-BOAPelt-epoch=09-val_loss=1.12.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_22/checkpoints/BOAPelt-epoch=05.ckpt


Testing: |                                                                                                    …

Test Results Last Epoch: [{'source_test/loss': 0.22985659539699554, 'source_test/accuracy': 0.911057710647583, 'source_test/f1': 0.9105929136276245, 'source_test/f1_macro': 0.9087851643562317, 'source_test/f1_micro': 0.911057710647583, 'target_test/loss': 0.3688313663005829, 'target_test/accuracy': 0.8822115659713745, 'target_test/f1': 0.8823797702789307, 'target_test/f1_macro': 0.8776121735572815, 'target_test/f1_micro': 0.8822115659713745}]
Best checkpoint path: checkpoints/lightning_logs/version_22/checkpoints/task-BOAPelt-epoch=09-val_loss=1.12.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_22/checkpoints/BOAPelt-epoch=05.ckpt


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/lightning_fabric/utilities/cloud_io.py:57: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.




Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  state_dict = torch.load(weights_file, map_location="cpu")


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results on Best Model: [{'source_test/loss': 0.22985659539699554, 'source_test/accuracy': 0.911057710647583, 'source_test/f1': 0.9105929136276245, 'source_test/f1_macro': 0.9087851643562317, 'source_test/f1_micro': 0.911057710647583, 'target_test/loss': 0.3688313663005829, 'target_test/accuracy': 0.8822115659713745, 'target_test/f1': 0.8823797702789307, 'target_test/f1_macro': 0.8776121735572815, 'target_test/f1_micro': 0.8822115659713745}]


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Seed set to 1000


Test Results on saved epoch: [{'source_test/loss': 0.23322641849517822, 'source_test/accuracy': 0.915865421295166, 'source_test/f1': 0.9154796004295349, 'source_test/f1_macro': 0.9138443470001221, 'source_test/f1_micro': 0.915865421295166, 'target_test/loss': 0.3029686510562897, 'target_test/accuracy': 0.8966346383094788, 'target_test/f1': 0.8963662981987, 'target_test/f1_macro': 0.8924261331558228, 'target_test/f1_micro': 0.8966346383094788}]


Source dataset length: 1440
Target dataset length: 1440




Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  state_dict = torch.load(weights_file, map_location="cpu")


GPU available: True (cuda), used: True


TPU available: False, using: 0 TPU cores


IPU available: False, using: 0 IPUs


HPU available: False, using: 0 HPUs


`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..


`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..


`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | BertAdapterModel   | 122 M 
1 | criterion     | CrossEntropyLoss   | 0     
2 | mlm_criterion | CrossEntropyLoss   | 0     
3 | accuracy      | MulticlassAccuracy | 0     
4 | f1            | MulticlassF1Score  | 0     
5 | f1_macro      | MulticlassF1Score  | 0     
6 | f1_micro      | MulticlassF1Score  | 0     
7 | softmax       | Softmax            | 0     
-----------------------------------------------------
12.9 M    Trainable params
109 M     Non-trainable params
122 M     Total params
489.615   Total estimated model params size (MB)


Sanity Checking: |                                                                                            …

val/accuracy: 0.109375
val/f1: 0.04954680800437927
val/taskclf_loss: 1.1246412992477417
val/loss: 1.7441984415054321
val/mlm_loss: 2.363755464553833


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/pytorch_lightning/loops/fit_loop.py:298: The number of training batches (45) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |                                                                                                   …

Validation: |                                                                                                 …

val/accuracy: 0.875
val/f1: 0.8750629425048828
val/taskclf_loss: 0.28219932317733765
val/loss: 1.187234878540039
val/mlm_loss: 2.092270612716675


Validation: |                                                                                                 …

val/accuracy: 0.9125000238418579
val/f1: 0.9129217267036438
val/taskclf_loss: 0.2409908026456833
val/loss: 1.2636170387268066
val/mlm_loss: 2.286243200302124


Validation: |                                                                                                 …

val/accuracy: 0.893750011920929
val/f1: 0.8946318030357361
val/taskclf_loss: 0.2562766969203949
val/loss: 1.168352723121643
val/mlm_loss: 2.0804288387298584


Validation: |                                                                                                 …

val/accuracy: 0.90625
val/f1: 0.9071063995361328
val/taskclf_loss: 0.2430570125579834
val/loss: 1.1862623691558838
val/mlm_loss: 2.129467725753784


Validation: |                                                                                                 …

val/accuracy: 0.918749988079071
val/f1: 0.918928325176239
val/taskclf_loss: 0.23343463242053986
val/loss: 1.1592496633529663
val/mlm_loss: 2.0850648880004883


Validation: |                                                                                                 …

val/accuracy: 0.918749988079071
val/f1: 0.9190271496772766
val/taskclf_loss: 0.2162967026233673
val/loss: 1.1458523273468018
val/mlm_loss: 2.0754077434539795


Validation: |                                                                                                 …

val/accuracy: 0.9125000238418579
val/f1: 0.9128574728965759
val/taskclf_loss: 0.2264663279056549
val/loss: 1.1709283590316772
val/mlm_loss: 2.1153905391693115


Validation: |                                                                                                 …

val/accuracy: 0.9125000238418579
val/f1: 0.9131200909614563
val/taskclf_loss: 0.22828280925750732
val/loss: 1.1792739629745483
val/mlm_loss: 2.1302649974823


Validation: |                                                                                                 …

val/accuracy: 0.918749988079071
val/f1: 0.918804943561554
val/taskclf_loss: 0.24437575042247772
val/loss: 1.2110834121704102
val/mlm_loss: 2.177790880203247


Validation: |                                                                                                 …

`Trainer.fit` stopped: `max_epochs=10` reached.


val/accuracy: 0.9312500357627869
val/f1: 0.9312807321548462
val/taskclf_loss: 0.21998734772205353
val/loss: 1.1924219131469727
val/mlm_loss: 2.164856433868408


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Best checkpoint path: checkpoints/lightning_logs/version_23/checkpoints/task-BOAPelt-epoch=05-val_loss=1.15.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_23/checkpoints/BOAPelt-epoch=05.ckpt


Testing: |                                                                                                    …

Test Results Last Epoch: [{'source_test/loss': 0.23625154793262482, 'source_test/accuracy': 0.920673131942749, 'source_test/f1': 0.9204500317573547, 'source_test/f1_macro': 0.9189558625221252, 'source_test/f1_micro': 0.920673131942749, 'target_test/loss': 0.3971921503543854, 'target_test/accuracy': 0.8870192766189575, 'target_test/f1': 0.8866490721702576, 'target_test/f1_macro': 0.8829326033592224, 'target_test/f1_micro': 0.8870192766189575}]
Best checkpoint path: checkpoints/lightning_logs/version_23/checkpoints/task-BOAPelt-epoch=05-val_loss=1.15.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_23/checkpoints/BOAPelt-epoch=05.ckpt


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/lightning_fabric/utilities/cloud_io.py:57: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.




Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  state_dict = torch.load(weights_file, map_location="cpu")


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results on Best Model: [{'source_test/loss': 0.2411457598209381, 'source_test/accuracy': 0.9086538553237915, 'source_test/f1': 0.9082756638526917, 'source_test/f1_macro': 0.9065330028533936, 'source_test/f1_micro': 0.9086538553237915, 'target_test/loss': 0.30148136615753174, 'target_test/accuracy': 0.8990384936332703, 'target_test/f1': 0.8986666202545166, 'target_test/f1_macro': 0.8950797319412231, 'target_test/f1_micro': 0.8990384936332703}]


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results on saved epoch: [{'source_test/loss': 0.2411457598209381, 'source_test/accuracy': 0.9086538553237915, 'source_test/f1': 0.9082756638526917, 'source_test/f1_macro': 0.9065330028533936, 'source_test/f1_micro': 0.9086538553237915, 'target_test/loss': 0.30148136615753174, 'target_test/accuracy': 0.8990384936332703, 'target_test/f1': 0.8986666202545166, 'target_test/f1_macro': 0.8950797319412231, 'target_test/f1_micro': 0.8990384936332703}]


In [6]:
results.items()

dict_items([('last_epoch', {'source_test/loss': [0.2373732030391693, 0.22985659539699554, 0.23625154793262482], 'source_test/accuracy': [0.9086538553237915, 0.911057710647583, 0.920673131942749], 'source_test/f1': [0.9080637097358704, 0.9105929136276245, 0.9204500317573547], 'source_test/f1_macro': [0.9062641263008118, 0.9087851643562317, 0.9189558625221252], 'source_test/f1_micro': [0.9086538553237915, 0.911057710647583, 0.920673131942749], 'target_test/loss': [0.3832109272480011, 0.3688313663005829, 0.3971921503543854], 'target_test/accuracy': [0.8990384936332703, 0.8822115659713745, 0.8870192766189575], 'target_test/f1': [0.8983282446861267, 0.8823797702789307, 0.8866490721702576], 'target_test/f1_macro': [0.8960915207862854, 0.8776121735572815, 0.8829326033592224], 'target_test/f1_micro': [0.8990384936332703, 0.8822115659713745, 0.8870192766189575]}), ('best_model', {'source_test/loss': [0.2373732030391693, 0.22985659539699554, 0.2411457598209381], 'source_test/accuracy': [0.908653

In [7]:
# Calculate mean and standard deviation for each scenario
mean_results = {scenario: {key: np.mean(values) for key, values in metrics.items()} for scenario, metrics in results.items()}
std_results = {scenario: {key: np.std(values) for key, values in metrics.items()} for scenario, metrics in results.items()}

# Log mean and standard deviation results to wandb
# wandb.init(project=project_name, name=f'{domain}_mean_results')
# for scenario in mean_results:
#     for key, value in mean_results[scenario].items():
#         wandb.log({f"{scenario}/{key}": value})
#         wandb.log({f"{scenario}/{key}_std": std_results[scenario][key]})
# wandb.finish()

print("Mean Results:", mean_results)
print("Standard Deviation Results:", std_results)

# # Save the best model's adapter
# if model:
#     adapter_save_path = f"../../saved/adapter_after_run/{hparams['task_adapter_name']}"
#     model.save_adapter(adapter_save_path, hparams['task_adapter_name'])
#     print(f"Adapter saved to {adapter_save_path}")
# else:
#     print("No best model to save.")

Mean Results: {'last_epoch': {'source_test/loss': 0.2344937821229299, 'source_test/accuracy': 0.9134615659713745, 'source_test/f1': 0.9130355517069498, 'source_test/f1_macro': 0.9113350510597229, 'source_test/f1_micro': 0.9134615659713745, 'target_test/loss': 0.38307814796765643, 'target_test/accuracy': 0.8894231120745341, 'target_test/f1': 0.889119029045105, 'target_test/f1_macro': 0.8855454325675964, 'target_test/f1_micro': 0.8894231120745341}, 'best_model': {'source_test/loss': 0.236125186085701, 'source_test/accuracy': 0.909455140431722, 'source_test/f1': 0.9089774290720621, 'source_test/f1_macro': 0.9071940978368124, 'source_test/f1_micro': 0.909455140431722, 'target_test/loss': 0.3511745532353719, 'target_test/accuracy': 0.8934295177459717, 'target_test/f1': 0.8931248784065247, 'target_test/f1_macro': 0.8895944754282633, 'target_test/f1_micro': 0.8934295177459717}, 'epoch_saved': {'source_test/loss': 0.2359308898448944, 'source_test/accuracy': 0.911057710647583, 'source_test/f1':

In [8]:
print('dones')

dones


In [9]:
best_val_loss

inf