In [1]:

import torch
import gc
torch.cuda.empty_cache()
gc.collect()


0

In [2]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
# Initialize the console

# Step 2: Import necessary libraries
from typing import Optional, Dict, Any
import os
import torch
import pytorch_lightning as pl
import pandas as pd
from transformers import AutoTokenizer
from torch.utils.data import Dataset, DataLoader
from rich.traceback import install
from pytorch_lightning.loggers import WandbLogger
import torch.nn as nn
import torch.optim as optim
from transformers import AutoConfig
from adapters import AutoAdapterModel, AdapterConfig
from adapters.composition import Stack
from pytorch_lightning import seed_everything, Trainer
from pytorch_lightning.callbacks import ModelCheckpoint,EarlyStopping
import torchmetrics

install(show_locals=True)

from setup import setup_src_path
print(setup_src_path())
import data.processed as processed
import config.config as config
import utils.setup as setup
import utils.functions as fn
from importlib import reload

from datasets import load_from_disk

print(config.Config.TXT_SAVE_PATH)
print(config.Config.MODEL_SAVE_PATH)

dataset = load_from_disk(f"../{config.Config.DATASETS_SAVE_PATH}/datasets")


['/home/guest/Desktop/projects/third-experiments/domain_adaptation_project/mixed/government', '/usr/lib/python38.zip', '/usr/lib/python3.8', '/usr/lib/python3.8/lib-dynload', '', '/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages', '/tmp/tmpbgld7gb_', '/home/guest/Desktop/projects/third-experiments/domain_adaptation_project/modules']


2024-08-13 13:40:43.884588: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-08-13 13:40:43.919663: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


./text-files/
./hp-model-


In [3]:
import torch
import os
import pytorch_lightning as pl
from transformers import  AutoConfig, DataCollatorForLanguageModeling
from collections import defaultdict
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
import numpy as np
import torchmetrics

class JointDomainTaskAdapter(pl.LightningModule):
    def __init__(self, hparams,source_dataset_length,target_dataset_length):
        super(JointDomainTaskAdapter, self).__init__()
        self.save_hyperparameters(hparams)

        # Load config with hidden states output
        self.config = AutoConfig.from_pretrained(self.hparams["pretrained_model_name"])
        self.config.output_hidden_states = True
        self.model = AutoAdapterModel.from_pretrained(self.hparams["pretrained_model_name"], config=self.config)

        # Set reduction factor and leave_out layers
        self.reduction_factor = self.hparams.get("reduction_factor", 16)
        self.leave_out = self.hparams.get("leave_out", [])
        # if self.leave_out != "None":
        #     self.leave_out = self.leave_out.split(",")
        #     self.leave_out = [int(i) for i in self.leave_out]
        # else:
        #     self.leave_out = []

        adapter_config = AdapterConfig.load("seq_bn")
        
        self.task_adapter_name = self.hparams["task_adapter_name"]
        self.model.add_adapter(self.task_adapter_name, config=adapter_config)


        # Load MLM adapter with head
        self.model.load_adapter(f"{self.hparams['saved_adapter_dir']}/{self.hparams['domain_adapter_name']}", with_head=True)

        # Add classification head for the task
        self.model.add_classification_head(f"{self.hparams['task_adapter_name']}", num_labels=self.hparams["num_classes"])

        # Set active adapters
        self.model.active_adapters = Stack(self.hparams['domain_adapter_name'], self.task_adapter_name)

        self.model.train_adapter(Stack(self.hparams['domain_adapter_name'], self.task_adapter_name))
        print(self.model.adapter_summary())
        # Calculate alpha based on dataset lengths
        self.alpha = source_dataset_length / (source_dataset_length + target_dataset_length)

        # Initialize loss functions and metrics
        self.criterion = nn.CrossEntropyLoss()
        self.mlm_criterion = nn.CrossEntropyLoss()
        self.accuracy = torchmetrics.Accuracy(task='multiclass', num_classes=self.hparams["num_classes"])
        self.f1 = torchmetrics.F1Score(task='multiclass', num_classes=self.hparams["num_classes"], average="weighted")
        self.f1_macro = torchmetrics.F1Score(task='multiclass', num_classes=self.hparams["num_classes"], average="macro")
        self.softmax = nn.Softmax(dim=1)
        self.validation_outputs = []
        self.test_outputs = []
        # Optimizer related variables
        self.learning_rate = self.hparams.get("learning_rate", 1e-4)
        self.scheduler_factor = self.hparams.get("scheduler_factor", 0.1)
        self.scheduler_patience = self.hparams.get("scheduler_patience", 0.05)
        self.scheduler_threshold = self.hparams.get("scheduler_threshold", 0.0001)
        self.scheduler_cooldown = self.hparams.get("scheduler_cooldown", 0)
        self.scheduler_eps = self.hparams.get("scheduler_eps", 1e-8)

    def forward(self, input_ids, attention_mask=None, labels=None, task=None):
        if task == "mlm":
            self.model.active_head= self.hparams['domain_adapter_name']
            outputs = self.model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        elif task == "classification":
            self.model.active_head= self.hparams['task_adapter_name']
            outputs = self.model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        else:
            raise ValueError("Task must be either 'mlm' or 'classification'.")
        return outputs

    def training_step(self, batch, batch_idx):
        # Separate source and target data processing
        source_input_ids = batch["source_input_ids"]
        source_attention_mask = batch["source_attention_mask"]
        source_labels = batch["label_source"]

        target_input_ids = batch["target_input_ids"]
        target_attention_mask = batch["target_attention_mask"]
        mlm_labels = batch["mlm_labels"]

        # Calculate dynamic alpha based on the lengths of source and target data
        alpha = self.alpha

        # Classification task
        cls_outputs = self(input_ids=source_input_ids, attention_mask=source_attention_mask, task="classification")
        cls_logits = cls_outputs.logits
        task_loss = self.criterion(cls_logits, source_labels)

        # MLM task
        mlm_outputs = self(input_ids=target_input_ids, attention_mask=target_attention_mask, labels=mlm_labels, task="mlm")
        mlm_loss = mlm_outputs.loss

        # Combine losses
        loss = alpha * task_loss + (1 - alpha) * mlm_loss

        accuracy = self.accuracy(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))
        f1 = self.f1(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))

        metrics = {
            "train/accuracy": accuracy,
            "train/f1": f1,
            "train/taskclf_loss": task_loss,
            "train/loss": loss,
            "train/mlm_loss": mlm_loss
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)

        return loss

    def validation_step(self, batch, batch_idx):
        # Separate source and target data processing
        source_input_ids = batch["source_input_ids"]
        source_attention_mask = batch["source_attention_mask"]
        source_labels = batch["label_source"]

        target_input_ids = batch["target_input_ids"]
        target_attention_mask = batch["target_attention_mask"]
        mlm_labels = batch["mlm_labels"]

        # Calculate dynamic alpha based on the lengths of source and target data
        alpha = self.alpha
        # Classification task
        cls_outputs = self(input_ids=source_input_ids, attention_mask=source_attention_mask, task="classification")
        cls_logits = cls_outputs.logits
        task_loss = self.criterion(cls_logits, source_labels)

        # MLM task
        mlm_outputs = self(input_ids=target_input_ids, attention_mask=target_attention_mask, labels=mlm_labels, task="mlm")
        mlm_loss = mlm_outputs.loss

        # Combine losses
        loss = alpha * task_loss + (1 - alpha) * mlm_loss

        accuracy = self.accuracy(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))
        f1 = self.f1(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))
        self.validation_outputs.append({
            "val/accuracy": accuracy,
            "val/f1": f1,
            "val/taskclf_loss": task_loss,
            "val/loss": loss,
            "val/mlm_loss": mlm_loss
                })
        
        metrics = {
            "val/accuracy": accuracy,
            "val/f1": f1,
            "val/taskclf_loss": task_loss,
            "val/loss": loss,
            "val/mlm_loss": mlm_loss
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)

        return metrics
    def on_validation_epoch_start(self):
        self.validation_outputs = []
    
    def on_validation_epoch_end(self):
        outputs= self.validation_outputs

        avg_loss = torch.stack([x["val/loss"] for x in outputs]).mean()
        avg_task_loss = torch.stack([x["val/taskclf_loss"] for x in outputs]).mean()
        avg_mlm_loss = torch.stack([x["val/mlm_loss"] for x in outputs]).mean()
        avg_accuracy = torch.stack([x["val/accuracy"] for x in outputs]).mean()
        avg_f1 = torch.stack([x["val/f1"] for x in outputs]).mean()
        print(f"val/accuracy: {avg_accuracy}")
        print(f"val/f1: {avg_f1}")
        print(f"val/taskclf_loss: {avg_task_loss}")
        print(f"val/loss: {avg_loss}")
        print(f"val/mlm_loss: {avg_mlm_loss}")
        metrics = {
            "val/avg_loss": avg_loss,
            "val/avg_taskclf_loss": avg_task_loss,
            "val/avg_mlm_loss": avg_mlm_loss,
            "val/avg_accuracy": avg_accuracy,
            "val/avg_f1": avg_f1,
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)
        self.log("val_loss", avg_loss)

    def test_step(self, batch, batch_idx):
        # Separate source and target data processing
        source_input_ids = batch["source_input_ids"]
        source_attention_mask = batch["source_attention_mask"]
        source_labels = batch["label_source"]

        target_input_ids = batch["target_input_ids"]
        target_attention_mask = batch["target_attention_mask"]
        target_labels = batch["label_target"]

        # Classification task for source data
        cls_outputs_source = self(input_ids=source_input_ids, attention_mask=source_attention_mask, task="classification")
        cls_logits_source = cls_outputs_source.logits
        task_loss_source = self.criterion(cls_logits_source, source_labels)

        # Classification task for target data
        cls_outputs_target = self(input_ids=target_input_ids, attention_mask=target_attention_mask, task="classification")
        cls_logits_target = cls_outputs_target.logits
        task_loss_target = self.criterion(cls_logits_target, target_labels)

        # Combine losses (though typically you would evaluate them separately)
        loss = task_loss_source + task_loss_target

        accuracy_source = self.accuracy(source_labels, torch.argmax(self.softmax(cls_logits_source), dim=1))
        f1_source = self.f1(source_labels, torch.argmax(self.softmax(cls_logits_source), dim=1))
        f1_macro_source = self.f1_macro(source_labels, torch.argmax(self.softmax(cls_logits_source), dim=1))

        accuracy_target = self.accuracy(target_labels, torch.argmax(self.softmax(cls_logits_target), dim=1))
        f1_target = self.f1(target_labels, torch.argmax(self.softmax(cls_logits_target), dim=1))
        f1_macro_target = self.f1_macro(target_labels, torch.argmax(self.softmax(cls_logits_target), dim=1))

        metrics = {
            "source_test/loss": task_loss_source,
            "source_test/accuracy": accuracy_source,
            "source_test/f1": f1_source,
            "source_test/f1_macro": f1_macro_source,
            "target_test/loss": task_loss_target,
            "target_test/accuracy": accuracy_target,
            "target_test/f1": f1_target,
            "target_test/f1_macro": f1_macro_target,
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)

        self.test_outputs.append({
            "source_test/loss": task_loss_source,
            "source_test/accuracy": accuracy_source,
            "source_test/f1": f1_source,
            "source_test/f1_macro": f1_macro_source,
            "target_test/loss": task_loss_target,
            "target_test/accuracy": accuracy_target,
            "target_test/f1": f1_target,
            "target_test/f1_macro": f1_macro_target,
        })
        return metrics
    def on_test_epoch_start(self):
        self.test_outputs = []

    def on_test_epoch_end(self):
        outputs=  self.test_outputs

        avg_loss_source = torch.stack([x["source_test/loss"] for x in outputs]).mean()
        avg_task_loss_target = torch.stack([x["target_test/loss"] for x in outputs]).mean()
        avg_accuracy_source = torch.stack([x["source_test/accuracy"] for x in outputs]).mean()
        avg_f1_source = torch.stack([x["source_test/f1"] for x in outputs]).mean()
        avg_f1_macro_source = torch.stack([x["source_test/f1_macro"] for x in outputs]).mean()

        avg_accuracy_target = torch.stack([x["target_test/accuracy"] for x in outputs]).mean()
        avg_f1_target = torch.stack([x["target_test/f1"] for x in outputs]).mean()
        avg_f1_macro_target = torch.stack([x["target_test/f1_macro"] for x in outputs]).mean()

        metrics = {
            "source_test/loss": avg_loss_source,
            "target_test/loss": avg_task_loss_target,
            "source_test/accuracy": avg_accuracy_source,
            "source_test/f1": avg_f1_source,
            "source_test/f1_macro": avg_f1_macro_source,
            "target_test/accuracy": avg_accuracy_target,
            "target_test/f1": avg_f1_target,
            "target_test/f1_macro": avg_f1_macro_target,
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)
    def save_adapter(self, location, adapter_name):
        self.model.save_adapter(location, adapter_name)

    def configure_optimizers(self):
        optimizer = optim.AdamW(self.parameters(), lr=self.learning_rate)
        lr_scheduler = ReduceLROnPlateau(
            optimizer=optimizer,
            mode="min",
            factor=self.scheduler_factor,
            patience=self.scheduler_patience,
            threshold=self.scheduler_threshold,
            cooldown=self.scheduler_cooldown,
            eps=self.scheduler_eps,
            verbose=True,
        )
        return [optimizer], [{"scheduler": lr_scheduler, "reduce_lr_on_plateau": True, "monitor": "val_loss", "interval": "epoch"}]


In [4]:
import wandb

wandb.login()
# Wandb setup and training loop
seeds = [42, 10, 100]  # List of seeds
project_name = 'mixed_edited'  # Replace with your wandb project name
domain = 'STR'  # Replace with the specific domain for this notebook
type = 'adapter'  # Replace with the specific type for this notebook

# Initialize results dictionary
results = {
    "last_epoch": {
        "source_test/loss": [],
        "source_test/accuracy": [],
        "source_test/f1": [],
        "source_test/f1_macro": [],
        "target_test/loss": [],
        "target_test/accuracy": [],
        "target_test/f1": [],
        "target_test/f1_macro": [],
    },
    "best_model": {
        "source_test/loss": [],
        "source_test/accuracy": [],
        "source_test/f1": [],
        "source_test/f1_macro": [],
        "target_test/loss": [],
        "target_test/accuracy": [],
        "target_test/f1": [],
        "target_test/f1_macro": [],
    },
    "epoch_saved": {
        "source_test/loss": [],
        "source_test/accuracy": [],
        "source_test/f1": [],
        "source_test/f1_macro": [],
        "target_test/loss": [],
        "target_test/accuracy": [],
        "target_test/f1": [],
        "target_test/f1_macro": [],
    }
}

best_val_loss = float('inf')
best_model = None
best_model_path = ""

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mmrawhani5[0m ([33mmrawhani[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [5]:
reload(processed)
for seed in seeds:
    wandb.init(project=project_name, name=f'{domain}_{type}_run_with_seed_{seed}', config={'seed': seed})

    try:
        seed_everything(seed)

        hparams = {
            "source_target": "slate_travel",
            "source_domain": "slate",
            "target_domain": "travel",
            "domain_adapter_name": "mlm_adapter_TR",
            "task_adapter_name": "task_STR",
            "pretrained_model_name": "bert-base-uncased",
            "padding": "max_length",
            "max_seq_length": 128,
            "bsz": 32,
            "num_classes": 3,
            "learning_rate": 1e-4,
            "reduction_factor": 16,
            "mode": "domain",
            "saved_adapter_dir": "../../saved/adapters",
        }

        save_dir = "checkpoints"
        save_epoch_3 = 6  # Save model at the 3rd epoch
        
        dm = processed.DataModuleSourceTargetMixed(hparams)
        dm.setup('fit')
        dm.setup("test")
        source_length, target_length = dm.get_dataset_lengths()
        print(f"Source dataset length: {source_length}")
        print(f"Target dataset length: {target_length}")
        model = JointDomainTaskAdapter(hparams,source_length,target_length)

        checkpoint_callback = ModelCheckpoint(
            filename="task-STR-{epoch:02d}-{val_loss:.2f}",
            save_top_k=1,
            monitor="val_loss",
            mode="min",
        )
        save_model_callback_epoch = ModelCheckpoint(
            filename="STR-{epoch:02d}",
            every_n_epochs=save_epoch_3,
            save_top_k=-1,
        )

        wandb_logger = WandbLogger()

    except Exception as e:
        print(f"Error during preprocessing : {e}")

    try:
        train_loader = dm.train_dataloader()
        val_loader = dm.val_dataloader()
        trainer = Trainer(
            max_epochs=10,
            accelerator="auto",
            precision=16,
            
            default_root_dir="checkpoints",
            logger=wandb_logger,
            callbacks=[checkpoint_callback, save_model_callback_epoch],
            limit_train_batches=1.0,
            limit_val_batches=1.0,
            limit_test_batches=1.0,
        )

        trainer.fit(model, train_loader, val_loader)
        print(f"Best checkpoint path: {checkpoint_callback.best_model_path}")
        print(f"Saved epoch checkpoint path: {save_model_callback_epoch.best_model_path}")
    except Exception as e:
        print(f"Error during training : {e}")

    try:
        dm.setup("test")
        test_loader = dm.test_dataloader()
        test_results_last = trainer.test(model, test_loader)
        print("Test Results Last Epoch:", test_results_last)

        # Collect results for last epoch model
        for key, value in test_results_last[0].items():
            results["last_epoch"][key].append(value)

        # Paths to the saved checkpoints
        best_checkpoint_path = checkpoint_callback.best_model_path
        saved_epoch_checkpoint_path = save_model_callback_epoch.best_model_path
        print(f"Best checkpoint path: {best_checkpoint_path}")
        print(f"Saved epoch checkpoint path: {saved_epoch_checkpoint_path}")

        best_model = JointDomainTaskAdapter.load_from_checkpoint(best_checkpoint_path,source_dataset_length=source_length, target_dataset_length=target_length)
        test_results_best = trainer.test(best_model, test_loader)
        print("Test Results on Best Model:", test_results_best)
        for key, value in test_results_best[0].items():
            results["best_model"][key].append(value)

        saved_epoch_model = JointDomainTaskAdapter.load_from_checkpoint(saved_epoch_checkpoint_path,source_dataset_length=source_length, target_dataset_length=target_length)
        test_results_saved_epoch = trainer.test(saved_epoch_model, test_loader)
        print("Test Results on saved epoch:", test_results_saved_epoch)
        for key, value in test_results_saved_epoch[0].items():
            results["epoch_saved"][key].append(value)

    except Exception as e:
        print(f"Error during testing: {e}")

    wandb.finish()

Seed set to 42


Batch size: 32
Source genre: slate
Target genre: travel
Number of target samples: 69615
Source genre: slate
Target genre: travel
Number of target samples: 69615
Source dataset length: 69575
Target dataset length: 24519


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/lightning_fabric/connector.py:563: `precision=16` is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..
`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..
/home/

Name                     Architecture         #Param      %Param  Active   Train
--------------------------------------------------------------------------------
task_STR                 bottleneck          894,528       0.817       1       1
mlm_adapter_TR           bottleneck        7,091,712       6.477       1       1
--------------------------------------------------------------------------------
Full model                               109,482,240     100.000               0
eee


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/pytorch_lightning/core/optimizer.py:257: Found unsupported keys in the lr scheduler dict: {'reduce_lr_on_plateau'}. HINT: remove them from the output of `configure_optimizers`.

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | BertAdapterModel   | 119 M 
1 | criterion     | CrossEntropyLoss   | 0     
2 | mlm_criterion | CrossEntropyLoss   | 0     
3 | accuracy      | MulticlassAccuracy | 0     
4 | f1            | MulticlassF1Score  | 0     
5 | f1_macro      | MulticlassF1Score  | 0     
6 | softmax       | Softmax            | 0     
-----------------------------------------------------
9.8 M     Trainable params
109 M     Non-trainable params
119 M     Total params
477.227   Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

val/accuracy: 0.25
val/f1: 0.3948001265525818
val/taskclf_loss: 1.1103515625
val/loss: 1.2324225902557373
val/mlm_loss: 1.578810453414917


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

val/accuracy: 0.7222705483436584
val/f1: 0.722049355506897
val/taskclf_loss: 0.6547640562057495
val/loss: 0.8998483419418335
val/mlm_loss: 1.5952982902526855


Validation: |          | 0/? [00:00<?, ?it/s]

val/accuracy: 0.7498571872711182
val/f1: 0.7510141134262085
val/taskclf_loss: 0.6160896420478821
val/loss: 0.8674074411392212
val/mlm_loss: 1.5805456638336182


Validation: |          | 0/? [00:00<?, ?it/s]

val/accuracy: 0.754037082195282
val/f1: 0.7541916370391846
val/taskclf_loss: 0.6229245066642761
val/loss: 0.8721663951873779
val/mlm_loss: 1.579413652420044


Validation: |          | 0/? [00:00<?, ?it/s]

val/accuracy: 0.762301504611969
val/f1: 0.7626084089279175
val/taskclf_loss: 0.6925884485244751
val/loss: 0.9176583290100098
val/mlm_loss: 1.556315302848816


Validation: |          | 0/? [00:00<?, ?it/s]

val/accuracy: 0.7644967436790466
val/f1: 0.7643694877624512
val/taskclf_loss: 0.6767385005950928
val/loss: 0.9090414643287659
val/mlm_loss: 1.5682231187820435


Validation: |          | 0/? [00:00<?, ?it/s]

val/accuracy: 0.7651424407958984
val/f1: 0.7650908827781677
val/taskclf_loss: 0.6744473576545715
val/loss: 0.9049272537231445
val/mlm_loss: 1.5589361190795898


Validation: |          | 0/? [00:00<?, ?it/s]

val/accuracy: 0.7652715444564819
val/f1: 0.7652300000190735
val/taskclf_loss: 0.6741917133331299
val/loss: 0.902003288269043
val/mlm_loss: 1.5484404563903809


Validation: |          | 0/? [00:00<?, ?it/s]

val/accuracy: 0.7652715444564819
val/f1: 0.7652300000190735
val/taskclf_loss: 0.6739785671234131
val/loss: 0.9024276733398438
val/mlm_loss: 1.5506740808486938


Validation: |          | 0/? [00:00<?, ?it/s]

val/accuracy: 0.7651424407958984
val/f1: 0.7650877833366394
val/taskclf_loss: 0.673761785030365
val/loss: 0.9008792638778687
val/mlm_loss: 1.545346736907959


Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


val/accuracy: 0.7650132775306702
val/f1: 0.7649500966072083
val/taskclf_loss: 0.6735296249389648
val/loss: 0.9053974747657776
val/mlm_loss: 1.5633443593978882
Best checkpoint path: ./lightning_logs/yr42u2kc/checkpoints/task-STR-epoch=01-val_loss=0.87.ckpt
Saved epoch checkpoint path: ./lightning_logs/yr42u2kc/checkpoints/STR-epoch=05.ckpt
Source genre: slate


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Target genre: travel
Number of target samples: 69615


Testing: |          | 0/? [00:00<?, ?it/s]

Test Results Last Epoch: [{'source_test/loss': 0.7276239395141602, 'source_test/accuracy': 0.7424395084381104, 'source_test/f1': 0.7421094179153442, 'source_test/f1_macro': 0.7310559749603271, 'target_test/loss': 0.5949236750602722, 'target_test/accuracy': 0.7923386693000793, 'target_test/f1': 0.7928553819656372, 'target_test/f1_macro': 0.7816459536552429}]
Best checkpoint path: ./lightning_logs/yr42u2kc/checkpoints/task-STR-epoch=01-val_loss=0.87.ckpt
Saved epoch checkpoint path: ./lightning_logs/yr42u2kc/checkpoints/STR-epoch=05.ckpt


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Name                     Architecture         #Param      %Param  Active   Train
--------------------------------------------------------------------------------
task_STR                 bottleneck          894,528       0.817       1       1
mlm_adapter_TR           bottleneck        7,091,712       6.477       1       1
--------------------------------------------------------------------------------
Full model                               109,482,240     100.000               0


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

Test Results on Best Model: [{'source_test/loss': 0.6533322930335999, 'source_test/accuracy': 0.7372311353683472, 'source_test/f1': 0.7371792793273926, 'source_test/f1_macro': 0.72635817527771, 'target_test/loss': 0.5613260865211487, 'target_test/accuracy': 0.7713373899459839, 'target_test/f1': 0.7731791734695435, 'target_test/f1_macro': 0.7598021626472473}]


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Name                     Architecture         #Param      %Param  Active   Train
--------------------------------------------------------------------------------
task_STR                 bottleneck          894,528       0.817       1       1
mlm_adapter_TR           bottleneck        7,091,712       6.477       1       1
--------------------------------------------------------------------------------
Full model                               109,482,240     100.000               0


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

Test Results on saved epoch: [{'source_test/loss': 0.7290706634521484, 'source_test/accuracy': 0.7424395084381104, 'source_test/f1': 0.7421094179153442, 'source_test/f1_macro': 0.7310559749603271, 'target_test/loss': 0.5962365865707397, 'target_test/accuracy': 0.7928427457809448, 'target_test/f1': 0.7933055758476257, 'target_test/f1_macro': 0.7822151780128479}]


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇█
source_test/accuracy,█▁█
source_test/f1,█▁█
source_test/f1_macro,█▁█
source_test/loss,█▁█
target_test/accuracy,█▁█
target_test/f1,█▁█
target_test/f1_macro,█▁█
target_test/loss,█▁█
train/accuracy,▁▃▄▄▅▅▅▅▅▇▅▅▄▇▄▅▇█▇▄▆▆▄█▄▇▅█▆▇▇▇▄▇▇▇▂▇█▇

0,1
epoch,10.0
source_test/accuracy,0.74244
source_test/f1,0.74211
source_test/f1_macro,0.73106
source_test/loss,0.72907
target_test/accuracy,0.79284
target_test/f1,0.79331
target_test/f1_macro,0.78222
target_test/loss,0.59624
train/accuracy,1.0


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112720911235859, max=1.0…

Seed set to 10


Batch size: 32
Source genre: slate
Target genre: travel
Number of target samples: 69615
Source genre: slate
Target genre: travel
Number of target samples: 69615
Source dataset length: 69575
Target dataset length: 24519


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/lightning_fabric/connector.py:563: `precision=16` is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..
`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..
/home/

Name                     Architecture         #Param      %Param  Active   Train
--------------------------------------------------------------------------------
task_STR                 bottleneck          894,528       0.817       1       1
mlm_adapter_TR           bottleneck        7,091,712       6.477       1       1
--------------------------------------------------------------------------------
Full model                               109,482,240     100.000               0
eee


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

val/accuracy: 0.234375
val/f1: 0.23654352128505707
val/taskclf_loss: 1.1039886474609375
val/loss: 1.2397480010986328
val/mlm_loss: 1.6249778270721436


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

val/accuracy: 0.7256687879562378
val/f1: 0.7250904440879822
val/taskclf_loss: 0.6489115953445435
val/loss: 0.8920480012893677
val/mlm_loss: 1.5819706916809082


Validation: |          | 0/? [00:00<?, ?it/s]

val/accuracy: 0.7458541393280029
val/f1: 0.7462177276611328
val/taskclf_loss: 0.6211041212081909
val/loss: 0.8725849390029907
val/mlm_loss: 1.5861855745315552


Validation: |          | 0/? [00:00<?, ?it/s]

val/accuracy: 0.7515835165977478
val/f1: 0.7529037594795227
val/taskclf_loss: 0.6235176920890808
val/loss: 0.8708669543266296
val/mlm_loss: 1.5727438926696777


Validation: |          | 0/? [00:00<?, ?it/s]

val/accuracy: 0.7518417835235596
val/f1: 0.7519688010215759
val/taskclf_loss: 0.6457965970039368
val/loss: 0.8853498101234436
val/mlm_loss: 1.5651047229766846


Validation: |          | 0/? [00:00<?, ?it/s]

val/accuracy: 0.7614859342575073
val/f1: 0.7618608474731445
val/taskclf_loss: 0.7507979869842529
val/loss: 0.9631682634353638
val/mlm_loss: 1.5657892227172852


Validation: |          | 0/? [00:00<?, ?it/s]

val/accuracy: 0.7610101699829102
val/f1: 0.7607399821281433
val/taskclf_loss: 0.7289873361587524
val/loss: 0.9435890913009644
val/mlm_loss: 1.5525418519973755


Validation: |          | 0/? [00:00<?, ?it/s]

val/accuracy: 0.7619141340255737
val/f1: 0.7617965340614319
val/taskclf_loss: 0.7259319424629211
val/loss: 0.9406719207763672
val/mlm_loss: 1.5500168800354004


Validation: |          | 0/? [00:00<?, ?it/s]

val/accuracy: 0.7620432376861572
val/f1: 0.7619520425796509
val/taskclf_loss: 0.7257725596427917
val/loss: 0.9395610690116882
val/mlm_loss: 1.5462064743041992


Validation: |          | 0/? [00:00<?, ?it/s]

val/accuracy: 0.762301504611969
val/f1: 0.7622100114822388
val/taskclf_loss: 0.7255417704582214
val/loss: 0.9395487308502197
val/mlm_loss: 1.546813726425171


Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


val/accuracy: 0.7621724009513855
val/f1: 0.762071967124939
val/taskclf_loss: 0.7253279685974121
val/loss: 0.9407249093055725
val/mlm_loss: 1.5519342422485352
Best checkpoint path: ./lightning_logs/y7h0bik4/checkpoints/task-STR-epoch=02-val_loss=0.87.ckpt
Saved epoch checkpoint path: ./lightning_logs/y7h0bik4/checkpoints/STR-epoch=05.ckpt
Source genre: slate
Target genre: travel
Number of target samples: 69615


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

Test Results Last Epoch: [{'source_test/loss': 0.7777258157730103, 'source_test/accuracy': 0.7446236610412598, 'source_test/f1': 0.743624746799469, 'source_test/f1_macro': 0.734000027179718, 'target_test/loss': 0.6185091137886047, 'target_test/accuracy': 0.7841061353683472, 'target_test/f1': 0.7846300601959229, 'target_test/f1_macro': 0.7748361229896545}]
Best checkpoint path: ./lightning_logs/y7h0bik4/checkpoints/task-STR-epoch=02-val_loss=0.87.ckpt
Saved epoch checkpoint path: ./lightning_logs/y7h0bik4/checkpoints/STR-epoch=05.ckpt


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Name                     Architecture         #Param      %Param  Active   Train
--------------------------------------------------------------------------------
task_STR                 bottleneck          894,528       0.817       1       1
mlm_adapter_TR           bottleneck        7,091,712       6.477       1       1
--------------------------------------------------------------------------------
Full model                               109,482,240     100.000               0


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

Test Results on Best Model: [{'source_test/loss': 0.6624234318733215, 'source_test/accuracy': 0.7399193048477173, 'source_test/f1': 0.7398250699043274, 'source_test/f1_macro': 0.727566123008728, 'target_test/loss': 0.552638828754425, 'target_test/accuracy': 0.7738575339317322, 'target_test/f1': 0.7755113244056702, 'target_test/f1_macro': 0.7626182436943054}]


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Name                     Architecture         #Param      %Param  Active   Train
--------------------------------------------------------------------------------
task_STR                 bottleneck          894,528       0.817       1       1
mlm_adapter_TR           bottleneck        7,091,712       6.477       1       1
--------------------------------------------------------------------------------
Full model                               109,482,240     100.000               0


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

Test Results on saved epoch: [{'source_test/loss': 0.7845600247383118, 'source_test/accuracy': 0.7456316947937012, 'source_test/f1': 0.7446166276931763, 'source_test/f1_macro': 0.7349688410758972, 'target_test/loss': 0.6241379976272583, 'target_test/accuracy': 0.7841061353683472, 'target_test/f1': 0.7845883369445801, 'target_test/f1_macro': 0.7746967077255249}]


VBox(children=(Label(value='0.002 MB of 0.018 MB uploaded\r'), FloatProgress(value=0.13787095017696704, max=1.…

0,1
epoch,▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇█
source_test/accuracy,▇▁█
source_test/f1,▇▁█
source_test/f1_macro,▇▁█
source_test/loss,█▁█
target_test/accuracy,█▁█
target_test/f1,█▁█
target_test/f1_macro,█▁█
target_test/loss,▇▁█
train/accuracy,▁▃▃▃▂▄▄▃▄▅▄▅▃▅▂▄▆▆▆▅▆▆▃▆▅▇▃█▆▅▆▇▃▆█▆▄▆▇▇

0,1
epoch,10.0
source_test/accuracy,0.74563
source_test/f1,0.74462
source_test/f1_macro,0.73497
source_test/loss,0.78456
target_test/accuracy,0.78411
target_test/f1,0.78459
target_test/f1_macro,0.7747
target_test/loss,0.62414
train/accuracy,1.0


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112770144568964, max=1.0…

Seed set to 100


Batch size: 32
Source genre: slate
Target genre: travel
Number of target samples: 69615
Source genre: slate
Target genre: travel
Number of target samples: 69615
Source dataset length: 69575
Target dataset length: 24519


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/lightning_fabric/connector.py:563: `precision=16` is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..
`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..
/home/

Name                     Architecture         #Param      %Param  Active   Train
--------------------------------------------------------------------------------
task_STR                 bottleneck          894,528       0.817       1       1
mlm_adapter_TR           bottleneck        7,091,712       6.477       1       1
--------------------------------------------------------------------------------
Full model                               109,482,240     100.000               0
eee


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

val/accuracy: 0.234375
val/f1: 0.25246700644493103
val/taskclf_loss: 1.108917236328125
val/loss: 1.2682106494903564
val/mlm_loss: 1.7202204465866089


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

val/accuracy: 0.7255396246910095
val/f1: 0.7261731624603271
val/taskclf_loss: 0.6477411389350891
val/loss: 0.8883228898048401
val/mlm_loss: 1.5709964036941528


Validation: |          | 0/? [00:00<?, ?it/s]

val/accuracy: 0.7426258325576782
val/f1: 0.743028998374939
val/taskclf_loss: 0.618272066116333
val/loss: 0.8671191334724426
val/mlm_loss: 1.5732462406158447


Validation: |          | 0/? [00:00<?, ?it/s]

val/accuracy: 0.7504213452339172
val/f1: 0.7501412034034729
val/taskclf_loss: 0.6385982036590576
val/loss: 0.8860843181610107
val/mlm_loss: 1.5883495807647705


Validation: |          | 0/? [00:00<?, ?it/s]

val/accuracy: 0.7579517960548401
val/f1: 0.758622944355011
val/taskclf_loss: 0.6985327005386353
val/loss: 0.9235091805458069
val/mlm_loss: 1.561901330947876


Validation: |          | 0/? [00:00<?, ?it/s]

val/accuracy: 0.7615267038345337
val/f1: 0.7611809372901917
val/taskclf_loss: 0.6804635524749756
val/loss: 0.9076746702194214
val/mlm_loss: 1.5524077415466309


Validation: |          | 0/? [00:00<?, ?it/s]

val/accuracy: 0.762688934803009
val/f1: 0.7624595761299133
val/taskclf_loss: 0.6774281859397888
val/loss: 0.9055944681167603
val/mlm_loss: 1.5530380010604858


Validation: |          | 0/? [00:00<?, ?it/s]

val/accuracy: 0.7630763053894043
val/f1: 0.7628254294395447
val/taskclf_loss: 0.6771578788757324
val/loss: 0.9066040515899658
val/mlm_loss: 1.5576794147491455


Validation: |          | 0/? [00:00<?, ?it/s]

val/accuracy: 0.7628180384635925
val/f1: 0.7625654935836792
val/taskclf_loss: 0.6768810153007507
val/loss: 0.9067729711532593
val/mlm_loss: 1.5591133832931519


Validation: |          | 0/? [00:00<?, ?it/s]

val/accuracy: 0.7630763053894043
val/f1: 0.7628439664840698
val/taskclf_loss: 0.6766652464866638
val/loss: 0.9080471992492676
val/mlm_loss: 1.5646154880523682


Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


val/accuracy: 0.7629472017288208
val/f1: 0.7627230286598206
val/taskclf_loss: 0.6764513254165649
val/loss: 0.9005722403526306
val/mlm_loss: 1.536536693572998
Best checkpoint path: ./lightning_logs/6oobg6kj/checkpoints/task-STR-epoch=01-val_loss=0.87.ckpt
Saved epoch checkpoint path: ./lightning_logs/6oobg6kj/checkpoints/STR-epoch=05.ckpt
Source genre: slate
Target genre: travel
Number of target samples: 69615


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

Test Results Last Epoch: [{'source_test/loss': 0.7169814705848694, 'source_test/accuracy': 0.7550402879714966, 'source_test/f1': 0.7543998956680298, 'source_test/f1_macro': 0.7436860203742981, 'target_test/loss': 0.5984389781951904, 'target_test/accuracy': 0.7846101522445679, 'target_test/f1': 0.7851084470748901, 'target_test/f1_macro': 0.7756248712539673}]
Best checkpoint path: ./lightning_logs/6oobg6kj/checkpoints/task-STR-epoch=01-val_loss=0.87.ckpt
Saved epoch checkpoint path: ./lightning_logs/6oobg6kj/checkpoints/STR-epoch=05.ckpt


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Name                     Architecture         #Param      %Param  Active   Train
--------------------------------------------------------------------------------
task_STR                 bottleneck          894,528       0.817       1       1
mlm_adapter_TR           bottleneck        7,091,712       6.477       1       1
--------------------------------------------------------------------------------
Full model                               109,482,240     100.000               0


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

Test Results on Best Model: [{'source_test/loss': 0.6739835143089294, 'source_test/accuracy': 0.7263104915618896, 'source_test/f1': 0.726767361164093, 'source_test/f1_macro': 0.7152743935585022, 'target_test/loss': 0.5703954100608826, 'target_test/accuracy': 0.7654569745063782, 'target_test/f1': 0.7654721140861511, 'target_test/f1_macro': 0.7567905783653259}]


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Name                     Architecture         #Param      %Param  Active   Train
--------------------------------------------------------------------------------
task_STR                 bottleneck          894,528       0.817       1       1
mlm_adapter_TR           bottleneck        7,091,712       6.477       1       1
--------------------------------------------------------------------------------
Full model                               109,482,240     100.000               0


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

Test Results on saved epoch: [{'source_test/loss': 0.7186063528060913, 'source_test/accuracy': 0.7535281777381897, 'source_test/f1': 0.7528422474861145, 'source_test/f1_macro': 0.7419508695602417, 'target_test/loss': 0.5999500751495361, 'target_test/accuracy': 0.7846101522445679, 'target_test/f1': 0.7851431369781494, 'target_test/f1_macro': 0.7757112383842468}]


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇█
source_test/accuracy,█▁█
source_test/f1,█▁█
source_test/f1_macro,█▁█
source_test/loss,█▁█
target_test/accuracy,█▁█
target_test/f1,█▁█
target_test/f1_macro,█▁█
target_test/loss,█▁█
train/accuracy,▁▄▂▂▃▄▃▃▃▆▄▄▃▅▁▅█▇█▄▆▆▃▆▃▆▃▆▆▅▆▅▁▆█▇▅▃██

0,1
epoch,10.0
source_test/accuracy,0.75353
source_test/f1,0.75284
source_test/f1_macro,0.74195
source_test/loss,0.71861
target_test/accuracy,0.78461
target_test/f1,0.78514
target_test/f1_macro,0.77571
target_test/loss,0.59995
train/accuracy,1.0


In [6]:
results.items()

dict_items([('last_epoch', {'source_test/loss': [0.7276239395141602, 0.7777258157730103, 0.7169814705848694], 'source_test/accuracy': [0.7424395084381104, 0.7446236610412598, 0.7550402879714966], 'source_test/f1': [0.7421094179153442, 0.743624746799469, 0.7543998956680298], 'source_test/f1_macro': [0.7310559749603271, 0.734000027179718, 0.7436860203742981], 'target_test/loss': [0.5949236750602722, 0.6185091137886047, 0.5984389781951904], 'target_test/accuracy': [0.7923386693000793, 0.7841061353683472, 0.7846101522445679], 'target_test/f1': [0.7928553819656372, 0.7846300601959229, 0.7851084470748901], 'target_test/f1_macro': [0.7816459536552429, 0.7748361229896545, 0.7756248712539673]}), ('best_model', {'source_test/loss': [0.6533322930335999, 0.6624234318733215, 0.6739835143089294], 'source_test/accuracy': [0.7372311353683472, 0.7399193048477173, 0.7263104915618896], 'source_test/f1': [0.7371792793273926, 0.7398250699043274, 0.726767361164093], 'source_test/f1_macro': [0.72635817527771

In [7]:
# Calculate mean and standard deviation for each scenario
mean_results = {scenario: {key: np.mean(values) for key, values in metrics.items()} for scenario, metrics in results.items()}
std_results = {scenario: {key: np.std(values) for key, values in metrics.items()} for scenario, metrics in results.items()}

# Log mean and standard deviation results to wandb
wandb.init(project=project_name, name=f'{domain}_mean_results')
for scenario in mean_results:
    for key, value in mean_results[scenario].items():
        wandb.log({f"{scenario}/{key}": value})
        wandb.log({f"{scenario}/{key}_std": std_results[scenario][key]})
wandb.finish()

print("Mean Results:", mean_results)
print("Standard Deviation Results:", std_results)

# # Save the best model's adapter
# if model:
#     adapter_save_path = f"../../saved/adapter_after_run/{hparams['task_adapter_name']}"
#     model.save_adapter(adapter_save_path, hparams['task_adapter_name'])
#     print(f"Adapter saved to {adapter_save_path}")
# else:
#     print("No best model to save.")

VBox(children=(Label(value='0.010 MB of 0.010 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
best_model/source_test/accuracy,▁
best_model/source_test/accuracy_std,▁
best_model/source_test/f1,▁
best_model/source_test/f1_macro,▁
best_model/source_test/f1_macro_std,▁
best_model/source_test/f1_std,▁
best_model/source_test/loss,▁
best_model/source_test/loss_std,▁
best_model/target_test/accuracy,▁
best_model/target_test/accuracy_std,▁

0,1
best_model/source_test/accuracy,0.73449
best_model/source_test/accuracy_std,0.00588
best_model/source_test/f1,0.73459
best_model/source_test/f1_macro,0.72307
best_model/source_test/f1_macro_std,0.00553
best_model/source_test/f1_std,0.00564
best_model/source_test/loss,0.66325
best_model/source_test/loss_std,0.00845
best_model/target_test/accuracy,0.77022
best_model/target_test/accuracy_std,0.00352


Mean Results: {'last_epoch': {'source_test/loss': 0.7407770752906799, 'source_test/accuracy': 0.7473678191502889, 'source_test/f1': 0.7467113534609476, 'source_test/f1_macro': 0.7362473408381144, 'target_test/loss': 0.6039572556813558, 'target_test/accuracy': 0.7870183189709982, 'target_test/f1': 0.78753129641215, 'target_test/f1_macro': 0.777368982632955}, 'best_model': {'source_test/loss': 0.6632464130719503, 'source_test/accuracy': 0.734486977259318, 'source_test/f1': 0.7345905701319376, 'source_test/f1_macro': 0.7230662306149801, 'target_test/loss': 0.5614534417788187, 'target_test/accuracy': 0.7702172994613647, 'target_test/f1': 0.771387537320455, 'target_test/f1_macro': 0.7597369949022929}, 'epoch_saved': {'source_test/loss': 0.7440790136655172, 'source_test/accuracy': 0.7471997936566671, 'source_test/f1': 0.7465227643648783, 'source_test/f1_macro': 0.735991895198822, 'target_test/loss': 0.6067748864491781, 'target_test/accuracy': 0.78718634446462, 'target_test/f1': 0.78767901659

In [8]:
print('dones')

dones


In [9]:
best_val_loss

inf