In [1]:

import torch
import gc
torch.cuda.empty_cache()
gc.collect()


0

In [2]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
# Initialize the console

# Step 2: Import necessary libraries
from typing import Optional, Dict, Any
import os
import torch
import pytorch_lightning as pl
import pandas as pd
from transformers import AutoTokenizer
from torch.utils.data import Dataset, DataLoader
from rich.traceback import install
from pytorch_lightning.loggers import WandbLogger
import torch.nn as nn
import torch.optim as optim
from transformers import AutoConfig
from adapters import AutoAdapterModel, AdapterConfig
from adapters.composition import Stack
from pytorch_lightning import seed_everything, Trainer
from pytorch_lightning.callbacks import ModelCheckpoint,EarlyStopping
import torchmetrics

install(show_locals=True)

from setup import setup_src_path
print(setup_src_path())
import data.processed as processed
import config.config as config
import utils.setup as setup
import utils.functions as fn
from importlib import reload

from datasets import load_from_disk

print(config.Config.TXT_SAVE_PATH)
print(config.Config.MODEL_SAVE_PATH)

#dataset = load_from_disk(f"../{config.Config.DATASETS_SAVE_PATH}/datasets")


['/usr/lib/python38.zip', '/usr/lib/python3.8', '/usr/lib/python3.8/lib-dynload', '', '/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages', '/tmp/tmp1exay4zw', '/home/guest/Desktop/projects/third-experiments/SDA_experiments/modules']


2024-09-29 22:08:46.195456: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-09-29 22:08:46.226147: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.




./text-files/
./hp-model-


In [3]:
import torch
import os
import pytorch_lightning as pl
from transformers import  AutoConfig, DataCollatorForLanguageModeling
from collections import defaultdict
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
import numpy as np
import torchmetrics

class JointDomainTaskAdapter(pl.LightningModule):
    def __init__(self, hparams,source_dataset_length,target_dataset_length):
        super(JointDomainTaskAdapter, self).__init__()
        self.save_hyperparameters(hparams)

        # Load config with hidden states output
        self.config = AutoConfig.from_pretrained(self.hparams["pretrained_model_name"])
        self.config.output_hidden_states = True
        self.model = AutoAdapterModel.from_pretrained(self.hparams["pretrained_model_name"], config=self.config)

        # Set reduction factor and leave_out layers
        self.reduction_factor = self.hparams.get("reduction_factor", 16)
        self.leave_out = self.hparams.get("leave_out", [])
        # if self.leave_out != "None":
        #     self.leave_out = self.leave_out.split(",")
        #     self.leave_out = [int(i) for i in self.leave_out]
        # else:
        #     self.leave_out = []

        # Load MLM adapter with head
        self.model.load_adapter(f"{self.hparams['saved_adapter_dir']}/{self.hparams['domain_adapter_name']}", with_head=True)

        # Add classification head for the task
        self.model.add_classification_head(f"{self.hparams['task_adapter_name']}", num_labels=self.hparams["num_classes"])

        # Set active adapters
        self.model.train_adapter(self.hparams['domain_adapter_name'])
        # Calculate alpha based on dataset lengths
        self.alpha = source_dataset_length / (source_dataset_length + target_dataset_length)

        # Initialize loss functions and metrics
        self.criterion = nn.CrossEntropyLoss()
        self.mlm_criterion = nn.CrossEntropyLoss()
        self.accuracy = torchmetrics.Accuracy(task='multiclass', num_classes=self.hparams["num_classes"])
        self.f1 = torchmetrics.F1Score(task='multiclass', num_classes=self.hparams["num_classes"], average="weighted")
        self.f1_macro = torchmetrics.F1Score(task='multiclass', num_classes=self.hparams["num_classes"], average="macro")
        self.f1_micro = torchmetrics.F1Score(task='multiclass',num_classes=self.hparams["num_classes"], average="micro")

        self.softmax = nn.Softmax(dim=1)
        self.validation_outputs = []
        self.test_outputs = []
        # Optimizer related variables
        self.learning_rate = self.hparams.get("learning_rate", 1e-4)
        self.scheduler_factor = self.hparams.get("scheduler_factor", 0.1)
        self.scheduler_patience = self.hparams.get("scheduler_patience", 0.05)
        self.scheduler_threshold = self.hparams.get("scheduler_threshold", 0.0001)
        self.scheduler_cooldown = self.hparams.get("scheduler_cooldown", 0)
        self.scheduler_eps = self.hparams.get("scheduler_eps", 1e-8)

    def forward(self, input_ids, attention_mask=None, labels=None, task=None):
        if task == "mlm":
            self.model.active_head= self.hparams['domain_adapter_name']
            outputs = self.model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        elif task == "classification":
            self.model.active_head= self.hparams['task_adapter_name']
            outputs = self.model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        else:
            raise ValueError("Task must be either 'mlm' or 'classification'.")
        return outputs

    def training_step(self, batch, batch_idx):
        # Separate source and target data processing
        source_input_ids = batch["source_input_ids"]
        source_attention_mask = batch["source_attention_mask"]
        source_labels = batch["label_source"]

        target_input_ids = batch["target_input_ids"]
        target_attention_mask = batch["target_attention_mask"]
        mlm_labels = batch["mlm_labels"]

        # Calculate dynamic alpha based on the lengths of source and target data
        alpha = self.alpha

        # Classification task
        cls_outputs = self(input_ids=source_input_ids, attention_mask=source_attention_mask, task="classification")
        cls_logits = cls_outputs.logits
        task_loss = self.criterion(cls_logits, source_labels)

        # MLM task
        mlm_outputs = self(input_ids=target_input_ids, attention_mask=target_attention_mask, labels=mlm_labels, task="mlm")
        mlm_loss = mlm_outputs.loss

        # Combine losses
        loss = alpha * task_loss + (1 - alpha) * mlm_loss

        accuracy = self.accuracy(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))
        f1 = self.f1(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))

        metrics = {
            "train/accuracy": accuracy,
            "train/f1": f1,
            "train/taskclf_loss": task_loss,
            "train/loss": loss,
            "train/mlm_loss": mlm_loss
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)

        return loss

    def validation_step(self, batch, batch_idx):
        # Separate source and target data processing
        source_input_ids = batch["source_input_ids"]
        source_attention_mask = batch["source_attention_mask"]
        source_labels = batch["label_source"]

        target_input_ids = batch["target_input_ids"]
        target_attention_mask = batch["target_attention_mask"]
        mlm_labels = batch["mlm_labels"]

        # Calculate dynamic alpha based on the lengths of source and target data
        alpha = self.alpha
        # Classification task
        cls_outputs = self(input_ids=source_input_ids, attention_mask=source_attention_mask, task="classification")
        cls_logits = cls_outputs.logits
        task_loss = self.criterion(cls_logits, source_labels)

        # MLM task
        mlm_outputs = self(input_ids=target_input_ids, attention_mask=target_attention_mask, labels=mlm_labels, task="mlm")
        mlm_loss = mlm_outputs.loss

        # Combine losses
        loss = alpha * task_loss + (1 - alpha) * mlm_loss

        accuracy = self.accuracy(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))
        f1 = self.f1(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))
        self.validation_outputs.append({
            "val/accuracy": accuracy,
            "val/f1": f1,
            "val/taskclf_loss": task_loss,
            "val/loss": loss,
            "val/mlm_loss": mlm_loss
                })
        
        metrics = {
            "val/accuracy": accuracy,
            "val/f1": f1,
            "val/taskclf_loss": task_loss,
            "val/loss": loss,
            "val/mlm_loss": mlm_loss
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)

        return metrics
    def on_validation_epoch_start(self):
        self.validation_outputs = []
    
    def on_validation_epoch_end(self):
        outputs= self.validation_outputs

        avg_loss = torch.stack([x["val/loss"] for x in outputs]).mean()
        avg_task_loss = torch.stack([x["val/taskclf_loss"] for x in outputs]).mean()
        avg_mlm_loss = torch.stack([x["val/mlm_loss"] for x in outputs]).mean()
        avg_accuracy = torch.stack([x["val/accuracy"] for x in outputs]).mean()
        avg_f1 = torch.stack([x["val/f1"] for x in outputs]).mean()
        print(f"val/accuracy: {avg_accuracy}")
        print(f"val/f1: {avg_f1}")
        print(f"val/taskclf_loss: {avg_task_loss}")
        print(f"val/loss: {avg_loss}")
        print(f"val/mlm_loss: {avg_mlm_loss}")
        metrics = {
            "val/avg_loss": avg_loss,
            "val/avg_taskclf_loss": avg_task_loss,
            "val/avg_mlm_loss": avg_mlm_loss,
            "val/avg_accuracy": avg_accuracy,
            "val/avg_f1": avg_f1,
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)
        self.log("val_loss", avg_loss)

    def test_step(self, batch, batch_idx):
        # Separate source and target data processing
        source_input_ids = batch["source_input_ids"]
        source_attention_mask = batch["source_attention_mask"]
        source_labels = batch["label_source"]

        target_input_ids = batch["target_input_ids"]
        target_attention_mask = batch["target_attention_mask"]
        target_labels = batch["label_target"]

        # Classification task for source data
        cls_outputs_source = self(input_ids=source_input_ids, attention_mask=source_attention_mask, task="classification")
        cls_logits_source = cls_outputs_source.logits
        task_loss_source = self.criterion(cls_logits_source, source_labels)

        # Classification task for target data
        cls_outputs_target = self(input_ids=target_input_ids, attention_mask=target_attention_mask, task="classification")
        cls_logits_target = cls_outputs_target.logits
        task_loss_target = self.criterion(cls_logits_target, target_labels)

        # Combine losses (though typically you would evaluate them separately)
        loss = task_loss_source + task_loss_target

        accuracy_source = self.accuracy(source_labels, torch.argmax(self.softmax(cls_logits_source), dim=1))
        f1_source = self.f1(source_labels, torch.argmax(self.softmax(cls_logits_source), dim=1))
        f1_macro_source = self.f1_macro(source_labels, torch.argmax(self.softmax(cls_logits_source), dim=1))
        f1_micro_source = self.f1_micro(source_labels, torch.argmax(self.softmax(cls_logits_source), dim=1))
    
        accuracy_target = self.accuracy(target_labels, torch.argmax(self.softmax(cls_logits_target), dim=1))
        f1_target = self.f1(target_labels, torch.argmax(self.softmax(cls_logits_target), dim=1))
        f1_macro_target = self.f1_macro(target_labels, torch.argmax(self.softmax(cls_logits_target), dim=1))
        f1_micro_target = self.f1_micro(target_labels, torch.argmax(self.softmax(cls_logits_target), dim=1))

        metrics = {
            "source_test/loss": task_loss_source,
            "source_test/accuracy": accuracy_source,
            "source_test/f1": f1_source,
            "source_test/f1_macro": f1_macro_source,
            "source_test/f1_micro": f1_micro_source,
            "target_test/loss": task_loss_target,
            "target_test/accuracy": accuracy_target,
            "target_test/f1": f1_target,
            "target_test/f1_macro": f1_macro_target,
            "target_test/f1_micro": f1_micro_target,
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)

        self.test_outputs.append({
            "source_test/loss": task_loss_source,
            "source_test/accuracy": accuracy_source,
            "source_test/f1": f1_source,
            "source_test/f1_macro": f1_macro_source,
            "source_test/f1_micro": f1_micro_source,
            "target_test/loss": task_loss_target,
            "target_test/accuracy": accuracy_target,
            "target_test/f1": f1_target,
            "target_test/f1_macro": f1_macro_target,
            "target_test/f1_micro": f1_micro_target,
        })
        return metrics
    def on_test_epoch_start(self):
        self.test_outputs = []

    def on_test_epoch_end(self):
        outputs=  self.test_outputs

        avg_loss_source = torch.stack([x["source_test/loss"] for x in outputs]).mean()
        avg_task_loss_target = torch.stack([x["target_test/loss"] for x in outputs]).mean()
        avg_accuracy_source = torch.stack([x["source_test/accuracy"] for x in outputs]).mean()
        avg_f1_source = torch.stack([x["source_test/f1"] for x in outputs]).mean()
        avg_f1_macro_source = torch.stack([x["source_test/f1_macro"] for x in outputs]).mean()
        avg_f1_micro_source = torch.stack([x["source_test/f1_micro"] for x in outputs]).mean()

        avg_accuracy_target = torch.stack([x["target_test/accuracy"] for x in outputs]).mean()
        avg_f1_target = torch.stack([x["target_test/f1"] for x in outputs]).mean()
        avg_f1_macro_target = torch.stack([x["target_test/f1_macro"] for x in outputs]).mean()
        avg_f1_micro_target = torch.stack([x["target_test/f1_micro"] for x in outputs]).mean()

        metrics = {
            "source_test/loss": avg_loss_source,
            "target_test/loss": avg_task_loss_target,
            "source_test/accuracy": avg_accuracy_source,
            "source_test/f1": avg_f1_source,
            "source_test/f1_macro": avg_f1_macro_source,
            "source_test/f1_micro": avg_f1_micro_source,
            "target_test/accuracy": avg_accuracy_target,
            "target_test/f1": avg_f1_target,
            "target_test/f1_macro": avg_f1_macro_target,
            "target_test/f1_micro": avg_f1_micro_target,
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)
    def save_adapter(self, location, adapter_name):
        self.model.save_adapter(location, adapter_name)

    def configure_optimizers(self):
        optimizer = optim.AdamW(self.parameters(), lr=self.hparams["learning_rate"])
        lr_scheduler = {
            'scheduler': optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=2, threshold=0.0001, cooldown=0, min_lr=1e-8),
            'monitor': 'val_loss'
        }
        return [optimizer], [lr_scheduler]


In [4]:
import wandb

wandb.login()
# Wandb setup and training loop
seeds = [10, 100, 1000]  # List of seeds
project_name = 'SDA_mixed_edited'  # Replace with your wandb project name
domain = 'BAA'  # Replace with the specific domain for this notebook
type = 'unipelt'  # Replace with the specific type for this notebook

# Initialize results dictionary
results = {
    "last_epoch": {
        "source_test/loss": [],
        "source_test/accuracy": [],
        "source_test/f1": [],
        "source_test/f1_macro": [],
        "source_test/f1_micro": [],
        "target_test/loss": [],
        "target_test/accuracy": [],
        "target_test/f1": [],
        "target_test/f1_macro": [],
        "target_test/f1_micro": [],
    },
    "best_model": {
        "source_test/loss": [],
        "source_test/accuracy": [],
        "source_test/f1": [],
        "source_test/f1_macro": [],
        "source_test/f1_micro": [],
        "target_test/loss": [],
        "target_test/accuracy": [],
        "target_test/f1": [],
        "target_test/f1_macro": [],
        "target_test/f1_micro": [],
    },
    "epoch_saved": {
        "source_test/loss": [],
        "source_test/accuracy": [],
        "source_test/f1": [],
        "source_test/f1_macro": [],
        "source_test/f1_micro": [],
        "target_test/loss": [],
        "target_test/accuracy": [],
        "target_test/f1": [],
        "target_test/f1_macro": [],
        "target_test/f1_micro": [],
    }
}

best_val_loss = float('inf')
best_model = None
best_model_path = ""

[34m[1mwandb[0m: Currently logged in as: [33mmrawhani5[0m ([33mmrawhani[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [5]:
reload(processed)
for seed in seeds:
    #wandb.init(project=project_name, name=f'{domain}_{type}_run_with_seed_{seed}', config={'seed': seed})

    try:
        seed_everything(seed)

        hparams = {
            "source_target": "baby_apparel",
            "dataset_cache_dir": "./../../datasets",
            "pretrained_model_name": "bert-base-uncased",
            "padding": True,
            "source_domain": "baby",
            "target_domain": "apparel",
            "domain_adapter_name": "mlm_unipelt_apparel",
            "task_adapter_name": "BAAPelt",
            "padding": "max_length",
            "max_seq_length": 512,
            "bsz": 32,
            "num_classes": 3,
            "learning_rate": 1e-4,
            "reduction_factor": 16,
            "mode": "domain",
            "saved_adapter_dir": "../saved/adapters",
        }

        save_dir = "checkpoints"
        save_epoch_3 = 6  # Save model at the 3rd epoch
        
        dm = processed.DataModuleSourceTargetMixed(hparams)
        dm.setup('fit')
        dm.setup("test")
        source_length, target_length = dm.get_dataset_lengths()
        print(f"Source dataset length: {source_length}")
        print(f"Target dataset length: {target_length}")
        model = JointDomainTaskAdapter(hparams,source_length,target_length)

        checkpoint_callback = ModelCheckpoint(
            filename="task-BAAPelt-{epoch:02d}-{val_loss:.2f}",
            save_top_k=1,
            monitor="val_loss",
            mode="min",
        )
        save_model_callback_epoch = ModelCheckpoint(
            filename="BAAPelt-{epoch:02d}",
            every_n_epochs=save_epoch_3,
            save_top_k=-1,
        )

        #wandb_logger = WandbLogger()

    except Exception as e:
        print(f"Error during preprocessing : {e}")

    try:
        train_loader = dm.train_dataloader()
        val_loader = dm.val_dataloader()
        trainer = Trainer(
            max_epochs=10,
            accelerator="auto",
            #precision=16,
            
            default_root_dir="checkpoints",
            #logger=wandb_logger,
            callbacks=[checkpoint_callback, save_model_callback_epoch],
            limit_train_batches=1.0,
            limit_val_batches=1.0,
            limit_test_batches=1.0,
        )

        trainer.fit(model, train_loader, val_loader)
        print(f"Best checkpoint path: {checkpoint_callback.best_model_path}")
        print(f"Saved epoch checkpoint path: {save_model_callback_epoch.best_model_path}")
    except Exception as e:
        print(f"Error during training : {e}")

    try:
        dm.setup("test")
        test_loader = dm.test_dataloader()
        test_results_last = trainer.test(model, test_loader)
        print("Test Results Last Epoch:", test_results_last)

        # Collect results for last epoch model
        for key, value in test_results_last[0].items():
            results["last_epoch"][key].append(value)

        # Paths to the saved checkpoints
        best_checkpoint_path = checkpoint_callback.best_model_path
        saved_epoch_checkpoint_path = save_model_callback_epoch.best_model_path
        print(f"Best checkpoint path: {best_checkpoint_path}")
        print(f"Saved epoch checkpoint path: {saved_epoch_checkpoint_path}")

        best_model = JointDomainTaskAdapter.load_from_checkpoint(best_checkpoint_path,source_dataset_length=source_length, target_dataset_length=target_length)
        test_results_best = trainer.test(best_model, test_loader)
        print("Test Results on Best Model:", test_results_best)
        for key, value in test_results_best[0].items():
            results["best_model"][key].append(value)

        saved_epoch_model = JointDomainTaskAdapter.load_from_checkpoint(saved_epoch_checkpoint_path,source_dataset_length=source_length, target_dataset_length=target_length)
        test_results_saved_epoch = trainer.test(saved_epoch_model, test_loader)
        print("Test Results on saved epoch:", test_results_saved_epoch)
        for key, value in test_results_saved_epoch[0].items():
            results["epoch_saved"][key].append(value)

    except Exception as e:
        print(f"Error during testing: {e}")

    #wandb.finish()

Seed set to 10




Source dataset length: 1350
Target dataset length: 1440




Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  state_dict = torch.load(weights_file, map_location="cpu")


GPU available: True (cuda), used: True


TPU available: False, using: 0 TPU cores


IPU available: False, using: 0 IPUs


HPU available: False, using: 0 HPUs


`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..


`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..


`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | BertAdapterModel   | 122 M 
1 | criterion     | CrossEntropyLoss   | 0     
2 | mlm_criterion | CrossEntropyLoss   | 0     
3 | accuracy      | MulticlassAccuracy | 0     
4 | f1            | MulticlassF1Score  | 0     
5 | f1_macro      | MulticlassF1Score  | 0     
6 | f1_micro      | MulticlassF1Score  | 0     
7 | softmax       | Softmax            | 0     
-----------------------------------------------------
12.9 M    Trainable params
109 M     Non-trainable params
122 M     Total params
489.615   Total estimated model params size (MB)


Sanity Checking: |                                                                                            …

val/accuracy: 0.03125
val/f1: 0.00751488097012043
val/taskclf_loss: 1.1253752708435059
val/loss: 1.5592983961105347
val/mlm_loss: 1.9661014080047607


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/pytorch_lightning/loops/fit_loop.py:298: The number of training batches (45) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |                                                                                                   …

Validation: |                                                                                                 …

val/accuracy: 0.8687500357627869
val/f1: 0.8688713312149048
val/taskclf_loss: 0.3637843132019043
val/loss: 1.2960097789764404
val/mlm_loss: 2.169971227645874


Validation: |                                                                                                 …

val/accuracy: 0.8687500357627869
val/f1: 0.8686186671257019
val/taskclf_loss: 0.32203108072280884
val/loss: 1.2936280965805054
val/mlm_loss: 2.204500675201416


Validation: |                                                                                                 …

val/accuracy: 0.84375
val/f1: 0.8449558615684509
val/taskclf_loss: 0.3898746073246002
val/loss: 1.2916942834854126
val/mlm_loss: 2.137150287628174


Validation: |                                                                                                 …

val/accuracy: 0.8500000238418579
val/f1: 0.8503780364990234
val/taskclf_loss: 0.38303670287132263
val/loss: 1.2998627424240112
val/mlm_loss: 2.1593871116638184


Validation: |                                                                                                 …

val/accuracy: 0.8375000357627869
val/f1: 0.8378636240959167
val/taskclf_loss: 0.39923545718193054
val/loss: 1.319744348526001
val/mlm_loss: 2.1827213764190674


Validation: |                                                                                                 …

val/accuracy: 0.8500000238418579
val/f1: 0.8500574231147766
val/taskclf_loss: 0.37120187282562256
val/loss: 1.2686502933502197
val/mlm_loss: 2.1100080013275146


Validation: |                                                                                                 …

val/accuracy: 0.831250011920929
val/f1: 0.8318613171577454
val/taskclf_loss: 0.47482284903526306
val/loss: 1.3121519088745117
val/mlm_loss: 2.0971477031707764


Validation: |                                                                                                 …

val/accuracy: 0.875
val/f1: 0.8748858571052551
val/taskclf_loss: 0.41966649889945984
val/loss: 1.3224884271621704
val/mlm_loss: 2.168884038925171


Validation: |                                                                                                 …

val/accuracy: 0.8812500238418579
val/f1: 0.8814330101013184
val/taskclf_loss: 0.39422303438186646
val/loss: 1.2861897945404053
val/mlm_loss: 2.122408628463745


Validation: |                                                                                                 …

val/accuracy: 0.887499988079071
val/f1: 0.8874121904373169
val/taskclf_loss: 0.4001811146736145
val/loss: 1.25243079662323
val/mlm_loss: 2.051414966583252


`Trainer.fit` stopped: `max_epochs=10` reached.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Best checkpoint path: checkpoints/lightning_logs/version_27/checkpoints/task-BAAPelt-epoch=09-val_loss=1.25.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_27/checkpoints/BAAPelt-epoch=05.ckpt


Testing: |                                                                                                    …

Test Results Last Epoch: [{'source_test/loss': 0.23772653937339783, 'source_test/accuracy': 0.9326923489570618, 'source_test/f1': 0.9332959651947021, 'source_test/f1_macro': 0.9265946745872498, 'source_test/f1_micro': 0.9326923489570618, 'target_test/loss': 0.31779032945632935, 'target_test/accuracy': 0.9134615659713745, 'target_test/f1': 0.9132410287857056, 'target_test/f1_macro': 0.909974217414856, 'target_test/f1_micro': 0.9134615659713745}]
Best checkpoint path: checkpoints/lightning_logs/version_27/checkpoints/task-BAAPelt-epoch=09-val_loss=1.25.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_27/checkpoints/BAAPelt-epoch=05.ckpt


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/lightning_fabric/utilities/cloud_io.py:57: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.




Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  state_dict = torch.load(weights_file, map_location="cpu")


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results on Best Model: [{'source_test/loss': 0.23772653937339783, 'source_test/accuracy': 0.9326923489570618, 'source_test/f1': 0.9332959651947021, 'source_test/f1_macro': 0.9265946745872498, 'source_test/f1_micro': 0.9326923489570618, 'target_test/loss': 0.31779032945632935, 'target_test/accuracy': 0.9134615659713745, 'target_test/f1': 0.9132410287857056, 'target_test/f1_macro': 0.909974217414856, 'target_test/f1_micro': 0.9134615659713745}]


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Seed set to 100


Test Results on saved epoch: [{'source_test/loss': 0.20179609954357147, 'source_test/accuracy': 0.9230769872665405, 'source_test/f1': 0.9232242703437805, 'source_test/f1_macro': 0.9174383878707886, 'source_test/f1_micro': 0.9230769872665405, 'target_test/loss': 0.2870055139064789, 'target_test/accuracy': 0.915865421295166, 'target_test/f1': 0.9151583909988403, 'target_test/f1_macro': 0.9136574864387512, 'target_test/f1_micro': 0.915865421295166}]


Source dataset length: 1350
Target dataset length: 1440




Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  state_dict = torch.load(weights_file, map_location="cpu")


GPU available: True (cuda), used: True


TPU available: False, using: 0 TPU cores


IPU available: False, using: 0 IPUs


HPU available: False, using: 0 HPUs


`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..


`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..


`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | BertAdapterModel   | 122 M 
1 | criterion     | CrossEntropyLoss   | 0     
2 | mlm_criterion | CrossEntropyLoss   | 0     
3 | accuracy      | MulticlassAccuracy | 0     
4 | f1            | MulticlassF1Score  | 0     
5 | f1_macro      | MulticlassF1Score  | 0     
6 | f1_micro      | MulticlassF1Score  | 0     
7 | softmax       | Softmax            | 0     
-----------------------------------------------------
12.9 M    Trainable params
109 M     Non-trainable params
122 M     Total params
489.615   Total estimated model params size (MB)


Sanity Checking: |                                                                                            …

val/accuracy: 0.0
val/f1: 0.0
val/taskclf_loss: 1.1450589895248413
val/loss: 1.5556626319885254
val/mlm_loss: 1.9406037330627441


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/pytorch_lightning/loops/fit_loop.py:298: The number of training batches (45) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |                                                                                                   …

Validation: |                                                                                                 …

val/accuracy: 0.862500011920929
val/f1: 0.8629001975059509
val/taskclf_loss: 0.4031517207622528
val/loss: 1.2686837911605835
val/mlm_loss: 2.08012056350708


Validation: |                                                                                                 …

val/accuracy: 0.8500000238418579
val/f1: 0.8501648902893066
val/taskclf_loss: 0.3348635137081146
val/loss: 1.2529255151748657
val/mlm_loss: 2.1136085987091064


Validation: |                                                                                                 …

val/accuracy: 0.8500000238418579
val/f1: 0.8501796722412109
val/taskclf_loss: 0.37280285358428955
val/loss: 1.2575855255126953
val/mlm_loss: 2.087069272994995


Validation: |                                                                                                 …

val/accuracy: 0.8375000357627869
val/f1: 0.8379772305488586
val/taskclf_loss: 0.4242214262485504
val/loss: 1.2337502241134644
val/mlm_loss: 1.9926834106445312


Validation: |                                                                                                 …

val/accuracy: 0.862500011920929
val/f1: 0.8629986047744751
val/taskclf_loss: 0.4146350026130676
val/loss: 1.315501093864441
val/mlm_loss: 2.1600632667541504


Validation: |                                                                                                 …

val/accuracy: 0.856249988079071
val/f1: 0.8565599322319031
val/taskclf_loss: 0.3794865608215332
val/loss: 1.2651208639144897
val/mlm_loss: 2.0954034328460693


Validation: |                                                                                                 …

val/accuracy: 0.856249988079071
val/f1: 0.8563753366470337
val/taskclf_loss: 0.43681249022483826
val/loss: 1.2620556354522705
val/mlm_loss: 2.0357210636138916


Validation: |                                                                                                 …

val/accuracy: 0.893750011920929
val/f1: 0.893643856048584
val/taskclf_loss: 0.37325355410575867
val/loss: 1.2420368194580078
val/mlm_loss: 2.05652117729187


Validation: |                                                                                                 …

val/accuracy: 0.893750011920929
val/f1: 0.893643856048584
val/taskclf_loss: 0.3761279284954071
val/loss: 1.3046841621398926
val/mlm_loss: 2.175205945968628


Validation: |                                                                                                 …

`Trainer.fit` stopped: `max_epochs=10` reached.


val/accuracy: 0.887499988079071
val/f1: 0.8872877955436707
val/taskclf_loss: 0.40003395080566406
val/loss: 1.2479019165039062
val/mlm_loss: 2.0427780151367188


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Best checkpoint path: checkpoints/lightning_logs/version_28/checkpoints/task-BAAPelt-epoch=03-val_loss=1.23.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_28/checkpoints/BAAPelt-epoch=05.ckpt


Testing: |                                                                                                    …

Test Results Last Epoch: [{'source_test/loss': 0.2196682244539261, 'source_test/accuracy': 0.9326923489570618, 'source_test/f1': 0.9332832098007202, 'source_test/f1_macro': 0.9265822172164917, 'source_test/f1_micro': 0.9326923489570618, 'target_test/loss': 0.30868300795555115, 'target_test/accuracy': 0.915865421295166, 'target_test/f1': 0.9154998660087585, 'target_test/f1_macro': 0.9126836061477661, 'target_test/f1_micro': 0.915865421295166}]
Best checkpoint path: checkpoints/lightning_logs/version_28/checkpoints/task-BAAPelt-epoch=03-val_loss=1.23.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_28/checkpoints/BAAPelt-epoch=05.ckpt


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/lightning_fabric/utilities/cloud_io.py:57: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.




Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  state_dict = torch.load(weights_file, map_location="cpu")


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results on Best Model: [{'source_test/loss': 0.22842848300933838, 'source_test/accuracy': 0.915865421295166, 'source_test/f1': 0.9164719581604004, 'source_test/f1_macro': 0.908331036567688, 'source_test/f1_micro': 0.915865421295166, 'target_test/loss': 0.3195975124835968, 'target_test/accuracy': 0.8918269276618958, 'target_test/f1': 0.8919014930725098, 'target_test/f1_macro': 0.8884965777397156, 'target_test/f1_micro': 0.8918269276618958}]


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Seed set to 1000


Test Results on saved epoch: [{'source_test/loss': 0.20614773035049438, 'source_test/accuracy': 0.9399038553237915, 'source_test/f1': 0.9399073123931885, 'source_test/f1_macro': 0.9355592131614685, 'source_test/f1_micro': 0.9399038553237915, 'target_test/loss': 0.2976929843425751, 'target_test/accuracy': 0.911057710647583, 'target_test/f1': 0.9102469086647034, 'target_test/f1_macro': 0.9088333249092102, 'target_test/f1_micro': 0.911057710647583}]


Source dataset length: 1350
Target dataset length: 1440




Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  state_dict = torch.load(weights_file, map_location="cpu")


GPU available: True (cuda), used: True


TPU available: False, using: 0 TPU cores


IPU available: False, using: 0 IPUs


HPU available: False, using: 0 HPUs


`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..


`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..


`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | BertAdapterModel   | 122 M 
1 | criterion     | CrossEntropyLoss   | 0     
2 | mlm_criterion | CrossEntropyLoss   | 0     
3 | accuracy      | MulticlassAccuracy | 0     
4 | f1            | MulticlassF1Score  | 0     
5 | f1_macro      | MulticlassF1Score  | 0     
6 | f1_micro      | MulticlassF1Score  | 0     
7 | softmax       | Softmax            | 0     
-----------------------------------------------------
12.9 M    Trainable params
109 M     Non-trainable params
122 M     Total params
489.615   Total estimated model params size (MB)


Sanity Checking: |                                                                                            …

val/accuracy: 0.140625
val/f1: 0.10473360121250153
val/taskclf_loss: 1.1140581369400024
val/loss: 1.7590632438659668
val/mlm_loss: 2.363755464553833


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/pytorch_lightning/loops/fit_loop.py:298: The number of training batches (45) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |                                                                                                   …

Validation: |                                                                                                 …

val/accuracy: 0.856249988079071
val/f1: 0.8563375473022461
val/taskclf_loss: 0.3381410539150238
val/loss: 1.2428948879241943
val/mlm_loss: 2.0911014080047607


Validation: |                                                                                                 …

val/accuracy: 0.8687500357627869
val/f1: 0.8688753247261047
val/taskclf_loss: 0.31905320286750793
val/loss: 1.3356876373291016
val/mlm_loss: 2.288782835006714


Validation: |                                                                                                 …

val/accuracy: 0.831250011920929
val/f1: 0.8328256011009216
val/taskclf_loss: 0.4218873083591461
val/loss: 1.277233362197876
val/mlm_loss: 2.079120397567749


Validation: |                                                                                                 …

val/accuracy: 0.862500011920929
val/f1: 0.8628503680229187
val/taskclf_loss: 0.39799144864082336
val/loss: 1.2947940826416016
val/mlm_loss: 2.1355464458465576


Validation: |                                                                                                 …

val/accuracy: 0.887499988079071
val/f1: 0.8872877955436707
val/taskclf_loss: 0.31732064485549927
val/loss: 1.2269576787948608
val/mlm_loss: 2.079742431640625


Validation: |                                                                                                 …

val/accuracy: 0.887499988079071
val/f1: 0.8872877955436707
val/taskclf_loss: 0.3356456458568573
val/loss: 1.2295078039169312
val/mlm_loss: 2.0675036907196045


Validation: |                                                                                                 …

val/accuracy: 0.887499988079071
val/f1: 0.8872877955436707
val/taskclf_loss: 0.3259572684764862
val/loss: 1.2442830801010132
val/mlm_loss: 2.1052136421203613


Validation: |                                                                                                 …

val/accuracy: 0.887499988079071
val/f1: 0.8872877955436707
val/taskclf_loss: 0.3311302363872528
val/loss: 1.256903052330017
val/mlm_loss: 2.1248152256011963


Validation: |                                                                                                 …

val/accuracy: 0.887499988079071
val/f1: 0.8872877955436707
val/taskclf_loss: 0.32855963706970215
val/loss: 1.2832748889923096
val/mlm_loss: 2.1783204078674316


Validation: |                                                                                                 …

`Trainer.fit` stopped: `max_epochs=10` reached.


val/accuracy: 0.887499988079071
val/f1: 0.8872877955436707
val/taskclf_loss: 0.3286202549934387
val/loss: 1.2805263996124268
val/mlm_loss: 2.172938823699951


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Best checkpoint path: checkpoints/lightning_logs/version_29/checkpoints/task-BAAPelt-epoch=04-val_loss=1.23.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_29/checkpoints/BAAPelt-epoch=05.ckpt


Testing: |                                                                                                    …

Test Results Last Epoch: [{'source_test/loss': 0.21299795806407928, 'source_test/accuracy': 0.9230769872665405, 'source_test/f1': 0.9230547547340393, 'source_test/f1_macro': 0.9175388813018799, 'source_test/f1_micro': 0.9230769872665405, 'target_test/loss': 0.28146058320999146, 'target_test/accuracy': 0.911057710647583, 'target_test/f1': 0.9106116890907288, 'target_test/f1_macro': 0.9078984260559082, 'target_test/f1_micro': 0.911057710647583}]
Best checkpoint path: checkpoints/lightning_logs/version_29/checkpoints/task-BAAPelt-epoch=04-val_loss=1.23.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_29/checkpoints/BAAPelt-epoch=05.ckpt


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/lightning_fabric/utilities/cloud_io.py:57: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.




Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  state_dict = torch.load(weights_file, map_location="cpu")


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results on Best Model: [{'source_test/loss': 0.2115146517753601, 'source_test/accuracy': 0.915865421295166, 'source_test/f1': 0.9158570766448975, 'source_test/f1_macro': 0.909648060798645, 'source_test/f1_micro': 0.915865421295166, 'target_test/loss': 0.26735129952430725, 'target_test/accuracy': 0.9086538553237915, 'target_test/f1': 0.908230721950531, 'target_test/f1_macro': 0.9053246974945068, 'target_test/f1_micro': 0.9086538553237915}]


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results on saved epoch: [{'source_test/loss': 0.21229912340641022, 'source_test/accuracy': 0.920673131942749, 'source_test/f1': 0.9206529259681702, 'source_test/f1_macro': 0.9150308966636658, 'source_test/f1_micro': 0.920673131942749, 'target_test/loss': 0.2774580419063568, 'target_test/accuracy': 0.9086538553237915, 'target_test/f1': 0.9081428647041321, 'target_test/f1_macro': 0.9055066704750061, 'target_test/f1_micro': 0.9086538553237915}]


In [6]:
results.items()

dict_items([('last_epoch', {'source_test/loss': [0.23772653937339783, 0.2196682244539261, 0.21299795806407928], 'source_test/accuracy': [0.9326923489570618, 0.9326923489570618, 0.9230769872665405], 'source_test/f1': [0.9332959651947021, 0.9332832098007202, 0.9230547547340393], 'source_test/f1_macro': [0.9265946745872498, 0.9265822172164917, 0.9175388813018799], 'source_test/f1_micro': [0.9326923489570618, 0.9326923489570618, 0.9230769872665405], 'target_test/loss': [0.31779032945632935, 0.30868300795555115, 0.28146058320999146], 'target_test/accuracy': [0.9134615659713745, 0.915865421295166, 0.911057710647583], 'target_test/f1': [0.9132410287857056, 0.9154998660087585, 0.9106116890907288], 'target_test/f1_macro': [0.909974217414856, 0.9126836061477661, 0.9078984260559082], 'target_test/f1_micro': [0.9134615659713745, 0.915865421295166, 0.911057710647583]}), ('best_model', {'source_test/loss': [0.23772653937339783, 0.22842848300933838, 0.2115146517753601], 'source_test/accuracy': [0.932

In [7]:
# Calculate mean and standard deviation for each scenario
mean_results = {scenario: {key: np.mean(values) for key, values in metrics.items()} for scenario, metrics in results.items()}
std_results = {scenario: {key: np.std(values) for key, values in metrics.items()} for scenario, metrics in results.items()}

# Log mean and standard deviation results to wandb
# wandb.init(project=project_name, name=f'{domain}_mean_results')
# for scenario in mean_results:
#     for key, value in mean_results[scenario].items():
#         wandb.log({f"{scenario}/{key}": value})
#         wandb.log({f"{scenario}/{key}_std": std_results[scenario][key]})
# wandb.finish()

print("Mean Results:", mean_results)
print("Standard Deviation Results:", std_results)

# # Save the best model's adapter
# if model:
#     adapter_save_path = f"../../saved/adapter_after_run/{hparams['task_adapter_name']}"
#     model.save_adapter(adapter_save_path, hparams['task_adapter_name'])
#     print(f"Adapter saved to {adapter_save_path}")
# else:
#     print("No best model to save.")

Mean Results: {'last_epoch': {'source_test/loss': 0.22346424063046774, 'source_test/accuracy': 0.9294872283935547, 'source_test/f1': 0.9298779765764872, 'source_test/f1_macro': 0.9235719243685404, 'source_test/f1_micro': 0.9294872283935547, 'target_test/loss': 0.30264464020729065, 'target_test/accuracy': 0.9134615659713745, 'target_test/f1': 0.913117527961731, 'target_test/f1_macro': 0.9101854165395101, 'target_test/f1_micro': 0.9134615659713745}, 'best_model': {'source_test/loss': 0.2258898913860321, 'source_test/accuracy': 0.9214743971824646, 'source_test/f1': 0.921875, 'source_test/f1_macro': 0.9148579239845276, 'source_test/f1_micro': 0.9214743971824646, 'target_test/loss': 0.30157971382141113, 'target_test/accuracy': 0.9046474496523539, 'target_test/f1': 0.9044577479362488, 'target_test/f1_macro': 0.9012651642163595, 'target_test/f1_micro': 0.9046474496523539}, 'epoch_saved': {'source_test/loss': 0.2067476511001587, 'source_test/accuracy': 0.9278846581776937, 'source_test/f1': 0.9

In [8]:
print('dones')

dones


In [9]:
best_val_loss

inf