In [1]:

import torch
import gc
torch.cuda.empty_cache()
gc.collect()


0

In [2]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
# Initialize the console

# Step 2: Import necessary libraries
from typing import Optional, Dict, Any
import os
import torch
import pytorch_lightning as pl
import pandas as pd
from transformers import AutoTokenizer
from torch.utils.data import Dataset, DataLoader
from rich.traceback import install
from pytorch_lightning.loggers import WandbLogger
import torch.nn as nn
import torch.optim as optim
from transformers import AutoConfig
from adapters import AutoAdapterModel, AdapterConfig
from adapters.composition import Stack
from pytorch_lightning import seed_everything, Trainer
from pytorch_lightning.callbacks import ModelCheckpoint,EarlyStopping
import torchmetrics

install(show_locals=True)

from setup import setup_src_path
print(setup_src_path())
import data.processed as processed
import config.config as config
import utils.setup as setup
import utils.functions as fn
from importlib import reload

from datasets import load_from_disk

print(config.Config.TXT_SAVE_PATH)
print(config.Config.MODEL_SAVE_PATH)

#dataset = load_from_disk(f"../{config.Config.DATASETS_SAVE_PATH}/datasets")


['/usr/lib/python38.zip', '/usr/lib/python3.8', '/usr/lib/python3.8/lib-dynload', '', '/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages', '/tmp/tmpx56y_bgm', '/home/guest/Desktop/projects/third-experiments/SDA_experiments/modules']


2024-09-29 23:02:34.054046: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-09-29 23:02:34.086437: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.




./text-files/
./hp-model-


In [3]:
import torch
import os
import pytorch_lightning as pl
from transformers import  AutoConfig, DataCollatorForLanguageModeling
from collections import defaultdict
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
import numpy as np
import torchmetrics

class JointDomainTaskAdapter(pl.LightningModule):
    def __init__(self, hparams,source_dataset_length,target_dataset_length):
        super(JointDomainTaskAdapter, self).__init__()
        self.save_hyperparameters(hparams)

        # Load config with hidden states output
        self.config = AutoConfig.from_pretrained(self.hparams["pretrained_model_name"])
        self.config.output_hidden_states = True
        self.model = AutoAdapterModel.from_pretrained(self.hparams["pretrained_model_name"], config=self.config)

        # Set reduction factor and leave_out layers
        self.reduction_factor = self.hparams.get("reduction_factor", 16)
        self.leave_out = self.hparams.get("leave_out", [])
        # if self.leave_out != "None":
        #     self.leave_out = self.leave_out.split(",")
        #     self.leave_out = [int(i) for i in self.leave_out]
        # else:
        #     self.leave_out = []

        # Load MLM adapter with head
        self.model.load_adapter(f"{self.hparams['saved_adapter_dir']}/{self.hparams['domain_adapter_name']}", with_head=True)

        # Add classification head for the task
        self.model.add_classification_head(f"{self.hparams['task_adapter_name']}", num_labels=self.hparams["num_classes"])

        # Set active adapters
        self.model.train_adapter(self.hparams['domain_adapter_name'])
        # Calculate alpha based on dataset lengths
        self.alpha = source_dataset_length / (source_dataset_length + target_dataset_length)

        # Initialize loss functions and metrics
        self.criterion = nn.CrossEntropyLoss()
        self.mlm_criterion = nn.CrossEntropyLoss()
        self.accuracy = torchmetrics.Accuracy(task='multiclass', num_classes=self.hparams["num_classes"])
        self.f1 = torchmetrics.F1Score(task='multiclass', num_classes=self.hparams["num_classes"], average="weighted")
        self.f1_macro = torchmetrics.F1Score(task='multiclass', num_classes=self.hparams["num_classes"], average="macro")
        self.f1_micro = torchmetrics.F1Score(task='multiclass',num_classes=self.hparams["num_classes"], average="micro")

        self.softmax = nn.Softmax(dim=1)
        self.validation_outputs = []
        self.test_outputs = []
        # Optimizer related variables
        self.learning_rate = self.hparams.get("learning_rate", 1e-4)
        self.scheduler_factor = self.hparams.get("scheduler_factor", 0.1)
        self.scheduler_patience = self.hparams.get("scheduler_patience", 0.05)
        self.scheduler_threshold = self.hparams.get("scheduler_threshold", 0.0001)
        self.scheduler_cooldown = self.hparams.get("scheduler_cooldown", 0)
        self.scheduler_eps = self.hparams.get("scheduler_eps", 1e-8)

    def forward(self, input_ids, attention_mask=None, labels=None, task=None):
        if task == "mlm":
            self.model.active_head= self.hparams['domain_adapter_name']
            outputs = self.model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        elif task == "classification":
            self.model.active_head= self.hparams['task_adapter_name']
            outputs = self.model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        else:
            raise ValueError("Task must be either 'mlm' or 'classification'.")
        return outputs

    def training_step(self, batch, batch_idx):
        # Separate source and target data processing
        source_input_ids = batch["source_input_ids"]
        source_attention_mask = batch["source_attention_mask"]
        source_labels = batch["label_source"]

        target_input_ids = batch["target_input_ids"]
        target_attention_mask = batch["target_attention_mask"]
        mlm_labels = batch["mlm_labels"]

        # Calculate dynamic alpha based on the lengths of source and target data
        alpha = self.alpha

        # Classification task
        cls_outputs = self(input_ids=source_input_ids, attention_mask=source_attention_mask, task="classification")
        cls_logits = cls_outputs.logits
        task_loss = self.criterion(cls_logits, source_labels)

        # MLM task
        mlm_outputs = self(input_ids=target_input_ids, attention_mask=target_attention_mask, labels=mlm_labels, task="mlm")
        mlm_loss = mlm_outputs.loss

        # Combine losses
        loss = alpha * task_loss + (1 - alpha) * mlm_loss

        accuracy = self.accuracy(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))
        f1 = self.f1(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))

        metrics = {
            "train/accuracy": accuracy,
            "train/f1": f1,
            "train/taskclf_loss": task_loss,
            "train/loss": loss,
            "train/mlm_loss": mlm_loss
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)

        return loss

    def validation_step(self, batch, batch_idx):
        # Separate source and target data processing
        source_input_ids = batch["source_input_ids"]
        source_attention_mask = batch["source_attention_mask"]
        source_labels = batch["label_source"]

        target_input_ids = batch["target_input_ids"]
        target_attention_mask = batch["target_attention_mask"]
        mlm_labels = batch["mlm_labels"]

        # Calculate dynamic alpha based on the lengths of source and target data
        alpha = self.alpha
        # Classification task
        cls_outputs = self(input_ids=source_input_ids, attention_mask=source_attention_mask, task="classification")
        cls_logits = cls_outputs.logits
        task_loss = self.criterion(cls_logits, source_labels)

        # MLM task
        mlm_outputs = self(input_ids=target_input_ids, attention_mask=target_attention_mask, labels=mlm_labels, task="mlm")
        mlm_loss = mlm_outputs.loss

        # Combine losses
        loss = alpha * task_loss + (1 - alpha) * mlm_loss

        accuracy = self.accuracy(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))
        f1 = self.f1(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))
        self.validation_outputs.append({
            "val/accuracy": accuracy,
            "val/f1": f1,
            "val/taskclf_loss": task_loss,
            "val/loss": loss,
            "val/mlm_loss": mlm_loss
                })
        
        metrics = {
            "val/accuracy": accuracy,
            "val/f1": f1,
            "val/taskclf_loss": task_loss,
            "val/loss": loss,
            "val/mlm_loss": mlm_loss
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)

        return metrics
    def on_validation_epoch_start(self):
        self.validation_outputs = []
    
    def on_validation_epoch_end(self):
        outputs= self.validation_outputs

        avg_loss = torch.stack([x["val/loss"] for x in outputs]).mean()
        avg_task_loss = torch.stack([x["val/taskclf_loss"] for x in outputs]).mean()
        avg_mlm_loss = torch.stack([x["val/mlm_loss"] for x in outputs]).mean()
        avg_accuracy = torch.stack([x["val/accuracy"] for x in outputs]).mean()
        avg_f1 = torch.stack([x["val/f1"] for x in outputs]).mean()
        print(f"val/accuracy: {avg_accuracy}")
        print(f"val/f1: {avg_f1}")
        print(f"val/taskclf_loss: {avg_task_loss}")
        print(f"val/loss: {avg_loss}")
        print(f"val/mlm_loss: {avg_mlm_loss}")
        metrics = {
            "val/avg_loss": avg_loss,
            "val/avg_taskclf_loss": avg_task_loss,
            "val/avg_mlm_loss": avg_mlm_loss,
            "val/avg_accuracy": avg_accuracy,
            "val/avg_f1": avg_f1,
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)
        self.log("val_loss", avg_loss)

    def test_step(self, batch, batch_idx):
        # Separate source and target data processing
        source_input_ids = batch["source_input_ids"]
        source_attention_mask = batch["source_attention_mask"]
        source_labels = batch["label_source"]

        target_input_ids = batch["target_input_ids"]
        target_attention_mask = batch["target_attention_mask"]
        target_labels = batch["label_target"]

        # Classification task for source data
        cls_outputs_source = self(input_ids=source_input_ids, attention_mask=source_attention_mask, task="classification")
        cls_logits_source = cls_outputs_source.logits
        task_loss_source = self.criterion(cls_logits_source, source_labels)

        # Classification task for target data
        cls_outputs_target = self(input_ids=target_input_ids, attention_mask=target_attention_mask, task="classification")
        cls_logits_target = cls_outputs_target.logits
        task_loss_target = self.criterion(cls_logits_target, target_labels)

        # Combine losses (though typically you would evaluate them separately)
        loss = task_loss_source + task_loss_target

        accuracy_source = self.accuracy(source_labels, torch.argmax(self.softmax(cls_logits_source), dim=1))
        f1_source = self.f1(source_labels, torch.argmax(self.softmax(cls_logits_source), dim=1))
        f1_macro_source = self.f1_macro(source_labels, torch.argmax(self.softmax(cls_logits_source), dim=1))
        f1_micro_source = self.f1_micro(source_labels, torch.argmax(self.softmax(cls_logits_source), dim=1))
    
        accuracy_target = self.accuracy(target_labels, torch.argmax(self.softmax(cls_logits_target), dim=1))
        f1_target = self.f1(target_labels, torch.argmax(self.softmax(cls_logits_target), dim=1))
        f1_macro_target = self.f1_macro(target_labels, torch.argmax(self.softmax(cls_logits_target), dim=1))
        f1_micro_target = self.f1_micro(target_labels, torch.argmax(self.softmax(cls_logits_target), dim=1))

        metrics = {
            "source_test/loss": task_loss_source,
            "source_test/accuracy": accuracy_source,
            "source_test/f1": f1_source,
            "source_test/f1_macro": f1_macro_source,
            "source_test/f1_micro": f1_micro_source,
            "target_test/loss": task_loss_target,
            "target_test/accuracy": accuracy_target,
            "target_test/f1": f1_target,
            "target_test/f1_macro": f1_macro_target,
            "target_test/f1_micro": f1_micro_target,
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)

        self.test_outputs.append({
            "source_test/loss": task_loss_source,
            "source_test/accuracy": accuracy_source,
            "source_test/f1": f1_source,
            "source_test/f1_macro": f1_macro_source,
            "source_test/f1_micro": f1_micro_source,
            "target_test/loss": task_loss_target,
            "target_test/accuracy": accuracy_target,
            "target_test/f1": f1_target,
            "target_test/f1_macro": f1_macro_target,
            "target_test/f1_micro": f1_micro_target,
        })
        return metrics
    def on_test_epoch_start(self):
        self.test_outputs = []

    def on_test_epoch_end(self):
        outputs=  self.test_outputs

        avg_loss_source = torch.stack([x["source_test/loss"] for x in outputs]).mean()
        avg_task_loss_target = torch.stack([x["target_test/loss"] for x in outputs]).mean()
        avg_accuracy_source = torch.stack([x["source_test/accuracy"] for x in outputs]).mean()
        avg_f1_source = torch.stack([x["source_test/f1"] for x in outputs]).mean()
        avg_f1_macro_source = torch.stack([x["source_test/f1_macro"] for x in outputs]).mean()
        avg_f1_micro_source = torch.stack([x["source_test/f1_micro"] for x in outputs]).mean()

        avg_accuracy_target = torch.stack([x["target_test/accuracy"] for x in outputs]).mean()
        avg_f1_target = torch.stack([x["target_test/f1"] for x in outputs]).mean()
        avg_f1_macro_target = torch.stack([x["target_test/f1_macro"] for x in outputs]).mean()
        avg_f1_micro_target = torch.stack([x["target_test/f1_micro"] for x in outputs]).mean()

        metrics = {
            "source_test/loss": avg_loss_source,
            "target_test/loss": avg_task_loss_target,
            "source_test/accuracy": avg_accuracy_source,
            "source_test/f1": avg_f1_source,
            "source_test/f1_macro": avg_f1_macro_source,
            "source_test/f1_micro": avg_f1_micro_source,
            "target_test/accuracy": avg_accuracy_target,
            "target_test/f1": avg_f1_target,
            "target_test/f1_macro": avg_f1_macro_target,
            "target_test/f1_micro": avg_f1_micro_target,
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)
    def save_adapter(self, location, adapter_name):
        self.model.save_adapter(location, adapter_name)

    def configure_optimizers(self):
        optimizer = optim.AdamW(self.parameters(), lr=self.hparams["learning_rate"])
        lr_scheduler = {
            'scheduler': optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=2, threshold=0.0001, cooldown=0, min_lr=1e-8),
            'monitor': 'val_loss'
        }
        return [optimizer], [lr_scheduler]


In [4]:
import wandb

wandb.login()
# Wandb setup and training loop
seeds = [10, 100, 1000]  # List of seeds
project_name = 'SDA_mixed_edited'  # Replace with your wandb project name
domain = 'CA'  # Replace with the specific domain for this notebook
type = 'union'  # Replace with the specific type for this notebook

# Initialize results dictionary
results = {
    "last_epoch": {
        "source_test/loss": [],
        "source_test/accuracy": [],
        "source_test/f1": [],
        "source_test/f1_macro": [],
        "source_test/f1_micro": [],
        "target_test/loss": [],
        "target_test/accuracy": [],
        "target_test/f1": [],
        "target_test/f1_macro": [],
        "target_test/f1_micro": [],
    },
    "best_model": {
        "source_test/loss": [],
        "source_test/accuracy": [],
        "source_test/f1": [],
        "source_test/f1_macro": [],
        "source_test/f1_micro": [],
        "target_test/loss": [],
        "target_test/accuracy": [],
        "target_test/f1": [],
        "target_test/f1_macro": [],
        "target_test/f1_micro": [],
    },
    "epoch_saved": {
        "source_test/loss": [],
        "source_test/accuracy": [],
        "source_test/f1": [],
        "source_test/f1_macro": [],
        "source_test/f1_micro": [],
        "target_test/loss": [],
        "target_test/accuracy": [],
        "target_test/f1": [],
        "target_test/f1_macro": [],
        "target_test/f1_micro": [],
    }
}

best_val_loss = float('inf')
best_model = None
best_model_path = ""

[34m[1mwandb[0m: Currently logged in as: [33mmrawhani5[0m ([33mmrawhani[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [5]:
reload(processed)
for seed in seeds:
    #wandb.init(project=project_name, name=f'{domain}_{type}_run_with_seed_{seed}', config={'seed': seed})

    try:
        seed_everything(seed)

        hparams = {
            "source_target": "camera_photo_apparel",
            "dataset_cache_dir": "./../../datasets",
            "pretrained_model_name": "bert-base-uncased",
            "padding": True,
            "source_domain": "camera_photo",
            "target_domain": "apparel",
            "domain_adapter_name": "mlm_union_apparel",
            "task_adapter_name": "CAUni",
            "padding": "max_length",
            "max_seq_length": 512,
            "bsz": 32,
            "num_classes": 3,
            "learning_rate": 1e-4,
            "reduction_factor": 16,
            "mode": "domain",
            "saved_adapter_dir": "../saved/adapters",
        }

        save_dir = "checkpoints"
        save_epoch_3 = 6  # Save model at the 3rd epoch
        
        dm = processed.DataModuleSourceTargetMixed(hparams)
        dm.setup('fit')
        dm.setup("test")
        source_length, target_length = dm.get_dataset_lengths()
        print(f"Source dataset length: {source_length}")
        print(f"Target dataset length: {target_length}")
        model = JointDomainTaskAdapter(hparams,source_length,target_length)

        checkpoint_callback = ModelCheckpoint(
            filename="task-CAUni-{epoch:02d}-{val_loss:.2f}",
            save_top_k=1,
            monitor="val_loss",
            mode="min",
        )
        save_model_callback_epoch = ModelCheckpoint(
            filename="CAUni-{epoch:02d}",
            every_n_epochs=save_epoch_3,
            save_top_k=-1,
        )

        #wandb_logger = WandbLogger()

    except Exception as e:
        print(f"Error during preprocessing : {e}")

    try:
        train_loader = dm.train_dataloader()
        val_loader = dm.val_dataloader()
        trainer = Trainer(
            max_epochs=10,
            accelerator="auto",
            #precision=16,
            
            default_root_dir="checkpoints",
            #logger=wandb_logger,
            callbacks=[checkpoint_callback, save_model_callback_epoch],
            limit_train_batches=1.0,
            limit_val_batches=1.0,
            limit_test_batches=1.0,
        )

        trainer.fit(model, train_loader, val_loader)
        print(f"Best checkpoint path: {checkpoint_callback.best_model_path}")
        print(f"Saved epoch checkpoint path: {save_model_callback_epoch.best_model_path}")
    except Exception as e:
        print(f"Error during training : {e}")

    try:
        dm.setup("test")
        test_loader = dm.test_dataloader()
        test_results_last = trainer.test(model, test_loader)
        print("Test Results Last Epoch:", test_results_last)

        # Collect results for last epoch model
        for key, value in test_results_last[0].items():
            results["last_epoch"][key].append(value)

        # Paths to the saved checkpoints
        best_checkpoint_path = checkpoint_callback.best_model_path
        saved_epoch_checkpoint_path = save_model_callback_epoch.best_model_path
        print(f"Best checkpoint path: {best_checkpoint_path}")
        print(f"Saved epoch checkpoint path: {saved_epoch_checkpoint_path}")

        best_model = JointDomainTaskAdapter.load_from_checkpoint(best_checkpoint_path,source_dataset_length=source_length, target_dataset_length=target_length)
        test_results_best = trainer.test(best_model, test_loader)
        print("Test Results on Best Model:", test_results_best)
        for key, value in test_results_best[0].items():
            results["best_model"][key].append(value)

        saved_epoch_model = JointDomainTaskAdapter.load_from_checkpoint(saved_epoch_checkpoint_path,source_dataset_length=source_length, target_dataset_length=target_length)
        test_results_saved_epoch = trainer.test(saved_epoch_model, test_loader)
        print("Test Results on saved epoch:", test_results_saved_epoch)
        for key, value in test_results_saved_epoch[0].items():
            results["epoch_saved"][key].append(value)

    except Exception as e:
        print(f"Error during testing: {e}")

    #wandb.finish()

Seed set to 10




Source dataset length: 1437
Target dataset length: 1440




Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  state_dict = torch.load(weights_file, map_location="cpu")


GPU available: True (cuda), used: True


TPU available: False, using: 0 TPU cores


IPU available: False, using: 0 IPUs


HPU available: False, using: 0 HPUs


`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..


`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..


`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | BertAdapterModel   | 119 M 
1 | criterion     | CrossEntropyLoss   | 0     
2 | mlm_criterion | CrossEntropyLoss   | 0     
3 | accuracy      | MulticlassAccuracy | 0     
4 | f1            | MulticlassF1Score  | 0     
5 | f1_macro      | MulticlassF1Score  | 0     
6 | f1_micro      | MulticlassF1Score  | 0     
7 | softmax       | Softmax            | 0     
-----------------------------------------------------
9.5 M     Trainable params
109 M     Non-trainable params
119 M     Total params
476.013   Total estimated model params size (MB)


Sanity Checking: |                                                                                            …

val/accuracy: 0.484375
val/f1: 0.6512866020202637
val/taskclf_loss: 1.1062952280044556
val/loss: 1.5643718242645264
val/mlm_loss: 2.021494150161743


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/pytorch_lightning/loops/fit_loop.py:298: The number of training batches (45) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |                                                                                                   …

Validation: |                                                                                                 …

val/accuracy: 0.893750011920929
val/f1: 0.8942413330078125
val/taskclf_loss: 0.23711371421813965
val/loss: 1.304153561592102
val/mlm_loss: 2.3689706325531006


Validation: |                                                                                                 …

val/accuracy: 0.9312500357627869
val/f1: 0.9313793182373047
val/taskclf_loss: 0.2015775740146637
val/loss: 1.1430715322494507
val/mlm_loss: 2.082604169845581


Validation: |                                                                                                 …

val/accuracy: 0.9437500238418579
val/f1: 0.9441343545913696
val/taskclf_loss: 0.20374846458435059
val/loss: 1.1359519958496094
val/mlm_loss: 2.066213369369507


Validation: |                                                                                                 …

val/accuracy: 0.918749988079071
val/f1: 0.9194253087043762
val/taskclf_loss: 0.23880448937416077
val/loss: 1.2122801542282104
val/mlm_loss: 2.183727979660034


Validation: |                                                                                                 …

val/accuracy: 0.9125000238418579
val/f1: 0.9130553603172302
val/taskclf_loss: 0.3022633492946625
val/loss: 1.2566715478897095
val/mlm_loss: 2.2090914249420166


Validation: |                                                                                                 …

val/accuracy: 0.9312500357627869
val/f1: 0.9317677617073059
val/taskclf_loss: 0.2642107903957367
val/loss: 1.1884516477584839
val/mlm_loss: 2.110767126083374


Validation: |                                                                                                 …

val/accuracy: 0.9312500357627869
val/f1: 0.931330680847168
val/taskclf_loss: 0.22728684544563293
val/loss: 1.1930140256881714
val/mlm_loss: 2.156729221343994


Validation: |                                                                                                 …

val/accuracy: 0.9312500357627869
val/f1: 0.931330680847168
val/taskclf_loss: 0.236845001578331
val/loss: 1.1515347957611084
val/mlm_loss: 2.064318895339966


Validation: |                                                                                                 …

val/accuracy: 0.9375
val/f1: 0.9375
val/taskclf_loss: 0.23601002991199493
val/loss: 1.1975306272506714
val/mlm_loss: 2.157047986984253


Validation: |                                                                                                 …

`Trainer.fit` stopped: `max_epochs=10` reached.


val/accuracy: 0.9375
val/f1: 0.9375
val/taskclf_loss: 0.23566599190235138
val/loss: 1.1504194736480713
val/mlm_loss: 2.063267230987549


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Best checkpoint path: checkpoints/lightning_logs/version_30/checkpoints/task-CAUni-epoch=02-val_loss=1.14.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_30/checkpoints/CAUni-epoch=05.ckpt


Testing: |                                                                                                    …

Test Results Last Epoch: [{'source_test/loss': 0.29498565196990967, 'source_test/accuracy': 0.9230769872665405, 'source_test/f1': 0.922988772392273, 'source_test/f1_macro': 0.9213860034942627, 'source_test/f1_micro': 0.9230769872665405, 'target_test/loss': 0.3614310026168823, 'target_test/accuracy': 0.911057710647583, 'target_test/f1': 0.9106193780899048, 'target_test/f1_macro': 0.9080077409744263, 'target_test/f1_micro': 0.911057710647583}]
Best checkpoint path: checkpoints/lightning_logs/version_30/checkpoints/task-CAUni-epoch=02-val_loss=1.14.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_30/checkpoints/CAUni-epoch=05.ckpt


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/lightning_fabric/utilities/cloud_io.py:57: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.




Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  state_dict = torch.load(weights_file, map_location="cpu")


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results on Best Model: [{'source_test/loss': 0.2834438979625702, 'source_test/accuracy': 0.9014423489570618, 'source_test/f1': 0.9013609290122986, 'source_test/f1_macro': 0.8988259434700012, 'source_test/f1_micro': 0.9014423489570618, 'target_test/loss': 0.2971420884132385, 'target_test/accuracy': 0.915865421295166, 'target_test/f1': 0.9157513380050659, 'target_test/f1_macro': 0.9123625755310059, 'target_test/f1_micro': 0.915865421295166}]


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Seed set to 100


Test Results on saved epoch: [{'source_test/loss': 0.3138881325721741, 'source_test/accuracy': 0.920673131942749, 'source_test/f1': 0.9205920696258545, 'source_test/f1_macro': 0.9189750552177429, 'source_test/f1_micro': 0.920673131942749, 'target_test/loss': 0.36943352222442627, 'target_test/accuracy': 0.9062500596046448, 'target_test/f1': 0.9055309891700745, 'target_test/f1_macro': 0.9036316871643066, 'target_test/f1_micro': 0.9062500596046448}]


Source dataset length: 1437
Target dataset length: 1440




Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  state_dict = torch.load(weights_file, map_location="cpu")


GPU available: True (cuda), used: True


TPU available: False, using: 0 TPU cores


IPU available: False, using: 0 IPUs


HPU available: False, using: 0 HPUs


`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..


`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..


`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | BertAdapterModel   | 119 M 
1 | criterion     | CrossEntropyLoss   | 0     
2 | mlm_criterion | CrossEntropyLoss   | 0     
3 | accuracy      | MulticlassAccuracy | 0     
4 | f1            | MulticlassF1Score  | 0     
5 | f1_macro      | MulticlassF1Score  | 0     
6 | f1_micro      | MulticlassF1Score  | 0     
7 | softmax       | Softmax            | 0     
-----------------------------------------------------
9.5 M     Trainable params
109 M     Non-trainable params
119 M     Total params
476.013   Total estimated model params size (MB)


Sanity Checking: |                                                                                            …

val/accuracy: 0.3125
val/f1: 0.3497768044471741
val/taskclf_loss: 1.1205947399139404
val/loss: 1.572704553604126
val/mlm_loss: 2.0238723754882812


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/pytorch_lightning/loops/fit_loop.py:298: The number of training batches (45) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |                                                                                                   …

Validation: |                                                                                                 …

val/accuracy: 0.9125000238418579
val/f1: 0.9126600623130798
val/taskclf_loss: 0.237981915473938
val/loss: 1.1667943000793457
val/mlm_loss: 2.0936717987060547


Validation: |                                                                                                 …

val/accuracy: 0.925000011920929
val/f1: 0.9254782795906067
val/taskclf_loss: 0.2193564474582672
val/loss: 1.1715655326843262
val/mlm_loss: 2.121791124343872


Validation: |                                                                                                 …

val/accuracy: 0.925000011920929
val/f1: 0.9256812334060669
val/taskclf_loss: 0.2740812599658966
val/loss: 1.1981487274169922
val/mlm_loss: 2.120291233062744


Validation: |                                                                                                 …

val/accuracy: 0.9375
val/f1: 0.9375162124633789
val/taskclf_loss: 0.24036717414855957
val/loss: 1.1418564319610596
val/mlm_loss: 2.0414676666259766


Validation: |                                                                                                 …

val/accuracy: 0.9125000238418579
val/f1: 0.9132302403450012
val/taskclf_loss: 0.3202182352542877
val/loss: 1.2097630500793457
val/mlm_loss: 2.097454786300659


Validation: |                                                                                                 …

val/accuracy: 0.9312500357627869
val/f1: 0.9313449859619141
val/taskclf_loss: 0.2605912387371063
val/loss: 1.2091600894927979
val/mlm_loss: 2.1557528972625732


Validation: |                                                                                                 …

val/accuracy: 0.918749988079071
val/f1: 0.9198951721191406
val/taskclf_loss: 0.3505517542362213
val/loss: 1.1989152431488037
val/mlm_loss: 2.045511245727539


Validation: |                                                                                                 …

val/accuracy: 0.918749988079071
val/f1: 0.9187942743301392
val/taskclf_loss: 0.280112624168396
val/loss: 1.1624412536621094
val/mlm_loss: 2.0429317951202393


Validation: |                                                                                                 …

val/accuracy: 0.925000011920929
val/f1: 0.9250501990318298
val/taskclf_loss: 0.27560701966285706
val/loss: 1.1545474529266357
val/mlm_loss: 2.0316569805145264


Validation: |                                                                                                 …

`Trainer.fit` stopped: `max_epochs=10` reached.


val/accuracy: 0.9125000238418579
val/f1: 0.9126788973808289
val/taskclf_loss: 0.2854309678077698
val/loss: 1.1446164846420288
val/mlm_loss: 2.002012014389038


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Best checkpoint path: checkpoints/lightning_logs/version_31/checkpoints/task-CAUni-epoch=03-val_loss=1.14.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_31/checkpoints/CAUni-epoch=05.ckpt


Testing: |                                                                                                    …

Test Results Last Epoch: [{'source_test/loss': 0.3513559401035309, 'source_test/accuracy': 0.9182692766189575, 'source_test/f1': 0.9182182550430298, 'source_test/f1_macro': 0.9161407947540283, 'source_test/f1_micro': 0.9182692766189575, 'target_test/loss': 0.39732396602630615, 'target_test/accuracy': 0.9134615659713745, 'target_test/f1': 0.9128344058990479, 'target_test/f1_macro': 0.9105923175811768, 'target_test/f1_micro': 0.9134615659713745}]
Best checkpoint path: checkpoints/lightning_logs/version_31/checkpoints/task-CAUni-epoch=03-val_loss=1.14.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_31/checkpoints/CAUni-epoch=05.ckpt


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/lightning_fabric/utilities/cloud_io.py:57: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.




Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  state_dict = torch.load(weights_file, map_location="cpu")


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results on Best Model: [{'source_test/loss': 0.34636953473091125, 'source_test/accuracy': 0.8966346383094788, 'source_test/f1': 0.896747887134552, 'source_test/f1_macro': 0.8933265209197998, 'source_test/f1_micro': 0.8966346383094788, 'target_test/loss': 0.36135560274124146, 'target_test/accuracy': 0.8990384936332703, 'target_test/f1': 0.8987895846366882, 'target_test/f1_macro': 0.8953178524971008, 'target_test/f1_micro': 0.8990384936332703}]


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Seed set to 1000


Test Results on saved epoch: [{'source_test/loss': 0.3403528928756714, 'source_test/accuracy': 0.915865421295166, 'source_test/f1': 0.915905237197876, 'source_test/f1_macro': 0.9140812158584595, 'source_test/f1_micro': 0.915865421295166, 'target_test/loss': 0.39794766902923584, 'target_test/accuracy': 0.9086538553237915, 'target_test/f1': 0.9080341458320618, 'target_test/f1_macro': 0.9057877063751221, 'target_test/f1_micro': 0.9086538553237915}]


Source dataset length: 1437
Target dataset length: 1440




Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  state_dict = torch.load(weights_file, map_location="cpu")


GPU available: True (cuda), used: True


TPU available: False, using: 0 TPU cores


IPU available: False, using: 0 IPUs


HPU available: False, using: 0 HPUs


`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..


`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..


`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | BertAdapterModel   | 119 M 
1 | criterion     | CrossEntropyLoss   | 0     
2 | mlm_criterion | CrossEntropyLoss   | 0     
3 | accuracy      | MulticlassAccuracy | 0     
4 | f1            | MulticlassF1Score  | 0     
5 | f1_macro      | MulticlassF1Score  | 0     
6 | f1_micro      | MulticlassF1Score  | 0     
7 | softmax       | Softmax            | 0     
-----------------------------------------------------
9.5 M     Trainable params
109 M     Non-trainable params
119 M     Total params
476.013   Total estimated model params size (MB)


Sanity Checking: |                                                                                            …

val/accuracy: 0.109375
val/f1: 0.050680678337812424
val/taskclf_loss: 1.1286402940750122
val/loss: 1.6726921796798706
val/mlm_loss: 2.2156105041503906


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/pytorch_lightning/loops/fit_loop.py:298: The number of training batches (45) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |                                                                                                   …

Validation: |                                                                                                 …

val/accuracy: 0.8812500238418579
val/f1: 0.8827654123306274
val/taskclf_loss: 0.2588697671890259
val/loss: 1.2233213186264038
val/mlm_loss: 2.1857638359069824


Validation: |                                                                                                 …

val/accuracy: 0.9375
val/f1: 0.9376373291015625
val/taskclf_loss: 0.2075955718755722
val/loss: 1.1515392065048218
val/mlm_loss: 2.0935161113739014


Validation: |                                                                                                 …

val/accuracy: 0.9375
val/f1: 0.937735378742218
val/taskclf_loss: 0.2226121872663498
val/loss: 1.193665862083435
val/mlm_loss: 2.162696599960327


Validation: |                                                                                                 …

val/accuracy: 0.8812500238418579
val/f1: 0.8843016624450684
val/taskclf_loss: 0.4082588851451874
val/loss: 1.2130343914031982
val/mlm_loss: 2.0161335468292236


Validation: |                                                                                                 …

val/accuracy: 0.9125000238418579
val/f1: 0.9132302403450012
val/taskclf_loss: 0.2694471776485443
val/loss: 1.160921335220337
val/mlm_loss: 2.0505383014678955


Validation: |                                                                                                 …

val/accuracy: 0.949999988079071
val/f1: 0.9501372575759888
val/taskclf_loss: 0.2022930383682251
val/loss: 1.1419146060943604
val/mlm_loss: 2.079578399658203


Validation: |                                                                                                 …

val/accuracy: 0.956250011920929
val/f1: 0.9564056396484375
val/taskclf_loss: 0.20049349963665009
val/loss: 1.152358055114746
val/mlm_loss: 2.1022396087646484


Validation: |                                                                                                 …

val/accuracy: 0.956250011920929
val/f1: 0.9564056396484375
val/taskclf_loss: 0.19963215291500092
val/loss: 1.1557811498641968
val/mlm_loss: 2.109938144683838


Validation: |                                                                                                 …

val/accuracy: 0.9437500238418579
val/f1: 0.9439823031425476
val/taskclf_loss: 0.20763826370239258
val/loss: 1.184435248374939
val/mlm_loss: 2.1591973304748535


Validation: |                                                                                                 …

val/accuracy: 0.949999988079071
val/f1: 0.9501372575759888
val/taskclf_loss: 0.20524516701698303
val/loss: 1.0878170728683472
val/mlm_loss: 1.9685505628585815


`Trainer.fit` stopped: `max_epochs=10` reached.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Best checkpoint path: checkpoints/lightning_logs/version_32/checkpoints/task-CAUni-epoch=09-val_loss=1.09.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_32/checkpoints/CAUni-epoch=05.ckpt


Testing: |                                                                                                    …

Test Results Last Epoch: [{'source_test/loss': 0.29205456376075745, 'source_test/accuracy': 0.915865421295166, 'source_test/f1': 0.9157369136810303, 'source_test/f1_macro': 0.9137505292892456, 'source_test/f1_micro': 0.915865421295166, 'target_test/loss': 0.3283419609069824, 'target_test/accuracy': 0.9134615659713745, 'target_test/f1': 0.913070797920227, 'target_test/f1_macro': 0.9105216860771179, 'target_test/f1_micro': 0.9134615659713745}]
Best checkpoint path: checkpoints/lightning_logs/version_32/checkpoints/task-CAUni-epoch=09-val_loss=1.09.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_32/checkpoints/CAUni-epoch=05.ckpt


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/lightning_fabric/utilities/cloud_io.py:57: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.




Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  state_dict = torch.load(weights_file, map_location="cpu")


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results on Best Model: [{'source_test/loss': 0.29205456376075745, 'source_test/accuracy': 0.915865421295166, 'source_test/f1': 0.9157369136810303, 'source_test/f1_macro': 0.9137505292892456, 'source_test/f1_micro': 0.915865421295166, 'target_test/loss': 0.3283419609069824, 'target_test/accuracy': 0.9134615659713745, 'target_test/f1': 0.913070797920227, 'target_test/f1_macro': 0.9105216860771179, 'target_test/f1_micro': 0.9134615659713745}]


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results on saved epoch: [{'source_test/loss': 0.2871432304382324, 'source_test/accuracy': 0.9134615659713745, 'source_test/f1': 0.9133260250091553, 'source_test/f1_macro': 0.9113537073135376, 'source_test/f1_micro': 0.9134615659713745, 'target_test/loss': 0.3240874111652374, 'target_test/accuracy': 0.911057710647583, 'target_test/f1': 0.9106267094612122, 'target_test/f1_macro': 0.9081457257270813, 'target_test/f1_micro': 0.911057710647583}]


In [6]:
results.items()

dict_items([('last_epoch', {'source_test/loss': [0.29498565196990967, 0.3513559401035309, 0.29205456376075745], 'source_test/accuracy': [0.9230769872665405, 0.9182692766189575, 0.915865421295166], 'source_test/f1': [0.922988772392273, 0.9182182550430298, 0.9157369136810303], 'source_test/f1_macro': [0.9213860034942627, 0.9161407947540283, 0.9137505292892456], 'source_test/f1_micro': [0.9230769872665405, 0.9182692766189575, 0.915865421295166], 'target_test/loss': [0.3614310026168823, 0.39732396602630615, 0.3283419609069824], 'target_test/accuracy': [0.911057710647583, 0.9134615659713745, 0.9134615659713745], 'target_test/f1': [0.9106193780899048, 0.9128344058990479, 0.913070797920227], 'target_test/f1_macro': [0.9080077409744263, 0.9105923175811768, 0.9105216860771179], 'target_test/f1_micro': [0.911057710647583, 0.9134615659713745, 0.9134615659713745]}), ('best_model', {'source_test/loss': [0.2834438979625702, 0.34636953473091125, 0.29205456376075745], 'source_test/accuracy': [0.901442

In [7]:
# Calculate mean and standard deviation for each scenario
mean_results = {scenario: {key: np.mean(values) for key, values in metrics.items()} for scenario, metrics in results.items()}
std_results = {scenario: {key: np.std(values) for key, values in metrics.items()} for scenario, metrics in results.items()}

# Log mean and standard deviation results to wandb
# wandb.init(project=project_name, name=f'{domain}_mean_results')
# for scenario in mean_results:
#     for key, value in mean_results[scenario].items():
#         wandb.log({f"{scenario}/{key}": value})
#         wandb.log({f"{scenario}/{key}_std": std_results[scenario][key]})
# wandb.finish()

print("Mean Results:", mean_results)
print("Standard Deviation Results:", std_results)

# # Save the best model's adapter
# if model:
#     adapter_save_path = f"../../saved/adapter_after_run/{hparams['task_adapter_name']}"
#     model.save_adapter(adapter_save_path, hparams['task_adapter_name'])
#     print(f"Adapter saved to {adapter_save_path}")
# else:
#     print("No best model to save.")

Mean Results: {'last_epoch': {'source_test/loss': 0.31279871861139935, 'source_test/accuracy': 0.9190705617268881, 'source_test/f1': 0.9189813137054443, 'source_test/f1_macro': 0.9170924425125122, 'source_test/f1_micro': 0.9190705617268881, 'target_test/loss': 0.3623656431833903, 'target_test/accuracy': 0.912660280863444, 'target_test/f1': 0.9121748606363932, 'target_test/f1_macro': 0.909707248210907, 'target_test/f1_micro': 0.912660280863444}, 'best_model': {'source_test/loss': 0.30728933215141296, 'source_test/accuracy': 0.9046474695205688, 'source_test/f1': 0.9046152432759603, 'source_test/f1_macro': 0.9019676645596822, 'source_test/f1_micro': 0.9046474695205688, 'target_test/loss': 0.3289465506871541, 'target_test/accuracy': 0.9094551602999369, 'target_test/f1': 0.9092039068539938, 'target_test/f1_macro': 0.9060673713684082, 'target_test/f1_micro': 0.9094551602999369}, 'epoch_saved': {'source_test/loss': 0.31379475196202594, 'source_test/accuracy': 0.9166667064030966, 'source_test/

In [8]:
print('dones')

dones


In [9]:
best_val_loss

inf