In [1]:

import torch
import gc
torch.cuda.empty_cache()
gc.collect()


0

In [2]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
# Initialize the console

# Step 2: Import necessary libraries
from typing import Optional, Dict, Any
import os
import torch
import pytorch_lightning as pl
import pandas as pd
from transformers import AutoTokenizer
from torch.utils.data import Dataset, DataLoader
from rich.traceback import install
from pytorch_lightning.loggers import WandbLogger
import torch.nn as nn
import torch.optim as optim
from transformers import AutoConfig
from adapters import AutoAdapterModel, AdapterConfig
from adapters.composition import Stack
from pytorch_lightning import seed_everything, Trainer
from pytorch_lightning.callbacks import ModelCheckpoint,EarlyStopping
import torchmetrics

install(show_locals=True)

from setup import setup_src_path
print(setup_src_path())
import data.processed as processed
import config.config as config
import utils.setup as setup
import utils.functions as fn
from importlib import reload

from datasets import load_from_disk

print(config.Config.TXT_SAVE_PATH)
print(config.Config.MODEL_SAVE_PATH)

dataset = load_from_disk(f"../{config.Config.DATASETS_SAVE_PATH}/datasets")


['/usr/lib/python38.zip', '/usr/lib/python3.8', '/usr/lib/python3.8/lib-dynload', '', '/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages', '/tmp/tmpm84amx1n', '/home/guest/Desktop/projects/third-experiments/domain_adaptation_project/modules']


2024-09-26 04:50:56.192332: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-09-26 04:50:56.343222: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.




./text-files/
./hp-model-


In [3]:
import torch
import os
import pytorch_lightning as pl
from transformers import  AutoConfig, DataCollatorForLanguageModeling
from collections import defaultdict
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
import numpy as np
import torchmetrics

class JointDomainTaskAdapter(pl.LightningModule):
    def __init__(self, hparams,source_dataset_length,target_dataset_length):
        super(JointDomainTaskAdapter, self).__init__()
        self.save_hyperparameters(hparams)

        # Load config with hidden states output
        self.config = AutoConfig.from_pretrained(self.hparams["pretrained_model_name"])
        self.config.output_hidden_states = True
        self.model = AutoAdapterModel.from_pretrained(self.hparams["pretrained_model_name"], config=self.config)

        # Set reduction factor and leave_out layers
        self.reduction_factor = self.hparams.get("reduction_factor", 16)
        self.leave_out = self.hparams.get("leave_out", [])
        # if self.leave_out != "None":
        #     self.leave_out = self.leave_out.split(",")
        #     self.leave_out = [int(i) for i in self.leave_out]
        # else:
        #     self.leave_out = []

        # Load MLM adapter with head
        self.model.load_adapter(f"{self.hparams['saved_adapter_dir']}/{self.hparams['domain_adapter_name']}", with_head=True)

        # Add classification head for the task
        self.model.add_classification_head(f"{self.hparams['task_adapter_name']}", num_labels=self.hparams["num_classes"])

        # Set active adapters
        self.model.train_adapter(self.hparams['domain_adapter_name'])
        # Calculate alpha based on dataset lengths
        self.alpha = source_dataset_length / (source_dataset_length + target_dataset_length)

        # Initialize loss functions and metrics
        self.criterion = nn.CrossEntropyLoss()
        self.mlm_criterion = nn.CrossEntropyLoss()
        self.accuracy = torchmetrics.Accuracy(task='multiclass', num_classes=self.hparams["num_classes"])
        self.f1 = torchmetrics.F1Score(task='multiclass', num_classes=self.hparams["num_classes"], average="weighted")
        self.f1_macro = torchmetrics.F1Score(task='multiclass', num_classes=self.hparams["num_classes"], average="macro")
        self.f1_micro = torchmetrics.F1Score(task='multiclass',num_classes=self.hparams["num_classes"], average="micro")

        self.softmax = nn.Softmax(dim=1)
        self.validation_outputs = []
        self.test_outputs = []
        # Optimizer related variables
        self.learning_rate = self.hparams.get("learning_rate", 1e-4)
        self.scheduler_factor = self.hparams.get("scheduler_factor", 0.1)
        self.scheduler_patience = self.hparams.get("scheduler_patience", 0.05)
        self.scheduler_threshold = self.hparams.get("scheduler_threshold", 0.0001)
        self.scheduler_cooldown = self.hparams.get("scheduler_cooldown", 0)
        self.scheduler_eps = self.hparams.get("scheduler_eps", 1e-8)

    def forward(self, input_ids, attention_mask=None, labels=None, task=None):
        if task == "mlm":
            self.model.active_head= self.hparams['domain_adapter_name']
            outputs = self.model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        elif task == "classification":
            self.model.active_head= self.hparams['task_adapter_name']
            outputs = self.model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        else:
            raise ValueError("Task must be either 'mlm' or 'classification'.")
        return outputs

    def training_step(self, batch, batch_idx):
        # Separate source and target data processing
        source_input_ids = batch["source_input_ids"]
        source_attention_mask = batch["source_attention_mask"]
        source_labels = batch["label_source"]

        target_input_ids = batch["target_input_ids"]
        target_attention_mask = batch["target_attention_mask"]
        mlm_labels = batch["mlm_labels"]

        # Calculate dynamic alpha based on the lengths of source and target data
        alpha = self.alpha

        # Classification task
        cls_outputs = self(input_ids=source_input_ids, attention_mask=source_attention_mask, task="classification")
        cls_logits = cls_outputs.logits
        task_loss = self.criterion(cls_logits, source_labels)

        # MLM task
        mlm_outputs = self(input_ids=target_input_ids, attention_mask=target_attention_mask, labels=mlm_labels, task="mlm")
        mlm_loss = mlm_outputs.loss

        # Combine losses
        loss = alpha * task_loss + (1 - alpha) * mlm_loss

        accuracy = self.accuracy(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))
        f1 = self.f1(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))

        metrics = {
            "train/accuracy": accuracy,
            "train/f1": f1,
            "train/taskclf_loss": task_loss,
            "train/loss": loss,
            "train/mlm_loss": mlm_loss
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)

        return loss

    def validation_step(self, batch, batch_idx):
        # Separate source and target data processing
        source_input_ids = batch["source_input_ids"]
        source_attention_mask = batch["source_attention_mask"]
        source_labels = batch["label_source"]

        target_input_ids = batch["target_input_ids"]
        target_attention_mask = batch["target_attention_mask"]
        mlm_labels = batch["mlm_labels"]

        # Calculate dynamic alpha based on the lengths of source and target data
        alpha = self.alpha
        # Classification task
        cls_outputs = self(input_ids=source_input_ids, attention_mask=source_attention_mask, task="classification")
        cls_logits = cls_outputs.logits
        task_loss = self.criterion(cls_logits, source_labels)

        # MLM task
        mlm_outputs = self(input_ids=target_input_ids, attention_mask=target_attention_mask, labels=mlm_labels, task="mlm")
        mlm_loss = mlm_outputs.loss

        # Combine losses
        loss = alpha * task_loss + (1 - alpha) * mlm_loss

        accuracy = self.accuracy(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))
        f1 = self.f1(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))
        self.validation_outputs.append({
            "val/accuracy": accuracy,
            "val/f1": f1,
            "val/taskclf_loss": task_loss,
            "val/loss": loss,
            "val/mlm_loss": mlm_loss
                })
        
        metrics = {
            "val/accuracy": accuracy,
            "val/f1": f1,
            "val/taskclf_loss": task_loss,
            "val/loss": loss,
            "val/mlm_loss": mlm_loss
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)

        return metrics
    def on_validation_epoch_start(self):
        self.validation_outputs = []
    
    def on_validation_epoch_end(self):
        outputs= self.validation_outputs

        avg_loss = torch.stack([x["val/loss"] for x in outputs]).mean()
        avg_task_loss = torch.stack([x["val/taskclf_loss"] for x in outputs]).mean()
        avg_mlm_loss = torch.stack([x["val/mlm_loss"] for x in outputs]).mean()
        avg_accuracy = torch.stack([x["val/accuracy"] for x in outputs]).mean()
        avg_f1 = torch.stack([x["val/f1"] for x in outputs]).mean()
        print(f"val/accuracy: {avg_accuracy}")
        print(f"val/f1: {avg_f1}")
        print(f"val/taskclf_loss: {avg_task_loss}")
        print(f"val/loss: {avg_loss}")
        print(f"val/mlm_loss: {avg_mlm_loss}")
        metrics = {
            "val/avg_loss": avg_loss,
            "val/avg_taskclf_loss": avg_task_loss,
            "val/avg_mlm_loss": avg_mlm_loss,
            "val/avg_accuracy": avg_accuracy,
            "val/avg_f1": avg_f1,
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)
        self.log("val_loss", avg_loss)

    def test_step(self, batch, batch_idx):
        # Separate source and target data processing
        source_input_ids = batch["source_input_ids"]
        source_attention_mask = batch["source_attention_mask"]
        source_labels = batch["label_source"]

        target_input_ids = batch["target_input_ids"]
        target_attention_mask = batch["target_attention_mask"]
        target_labels = batch["label_target"]

        # Classification task for source data
        cls_outputs_source = self(input_ids=source_input_ids, attention_mask=source_attention_mask, task="classification")
        cls_logits_source = cls_outputs_source.logits
        task_loss_source = self.criterion(cls_logits_source, source_labels)

        # Classification task for target data
        cls_outputs_target = self(input_ids=target_input_ids, attention_mask=target_attention_mask, task="classification")
        cls_logits_target = cls_outputs_target.logits
        task_loss_target = self.criterion(cls_logits_target, target_labels)

        # Combine losses (though typically you would evaluate them separately)
        loss = task_loss_source + task_loss_target

        accuracy_source = self.accuracy(source_labels, torch.argmax(self.softmax(cls_logits_source), dim=1))
        f1_source = self.f1(source_labels, torch.argmax(self.softmax(cls_logits_source), dim=1))
        f1_macro_source = self.f1_macro(source_labels, torch.argmax(self.softmax(cls_logits_source), dim=1))
        f1_micro_source = self.f1_micro(source_labels, torch.argmax(self.softmax(cls_logits_source), dim=1))
    
        accuracy_target = self.accuracy(target_labels, torch.argmax(self.softmax(cls_logits_target), dim=1))
        f1_target = self.f1(target_labels, torch.argmax(self.softmax(cls_logits_target), dim=1))
        f1_macro_target = self.f1_macro(target_labels, torch.argmax(self.softmax(cls_logits_target), dim=1))
        f1_micro_target = self.f1_micro(target_labels, torch.argmax(self.softmax(cls_logits_target), dim=1))

        metrics = {
            "source_test/loss": task_loss_source,
            "source_test/accuracy": accuracy_source,
            "source_test/f1": f1_source,
            "source_test/f1_macro": f1_macro_source,
            "source_test/f1_micro": f1_micro_source,
            "target_test/loss": task_loss_target,
            "target_test/accuracy": accuracy_target,
            "target_test/f1": f1_target,
            "target_test/f1_macro": f1_macro_target,
            "target_test/f1_micro": f1_micro_target,
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)

        self.test_outputs.append({
            "source_test/loss": task_loss_source,
            "source_test/accuracy": accuracy_source,
            "source_test/f1": f1_source,
            "source_test/f1_macro": f1_macro_source,
            "source_test/f1_micro": f1_micro_source,
            "target_test/loss": task_loss_target,
            "target_test/accuracy": accuracy_target,
            "target_test/f1": f1_target,
            "target_test/f1_macro": f1_macro_target,
            "target_test/f1_micro": f1_micro_target,
        })
        return metrics
    def on_test_epoch_start(self):
        self.test_outputs = []

    def on_test_epoch_end(self):
        outputs=  self.test_outputs

        avg_loss_source = torch.stack([x["source_test/loss"] for x in outputs]).mean()
        avg_task_loss_target = torch.stack([x["target_test/loss"] for x in outputs]).mean()
        avg_accuracy_source = torch.stack([x["source_test/accuracy"] for x in outputs]).mean()
        avg_f1_source = torch.stack([x["source_test/f1"] for x in outputs]).mean()
        avg_f1_macro_source = torch.stack([x["source_test/f1_macro"] for x in outputs]).mean()
        avg_f1_micro_source = torch.stack([x["source_test/f1_micro"] for x in outputs]).mean()

        avg_accuracy_target = torch.stack([x["target_test/accuracy"] for x in outputs]).mean()
        avg_f1_target = torch.stack([x["target_test/f1"] for x in outputs]).mean()
        avg_f1_macro_target = torch.stack([x["target_test/f1_macro"] for x in outputs]).mean()
        avg_f1_micro_target = torch.stack([x["target_test/f1_micro"] for x in outputs]).mean()

        metrics = {
            "source_test/loss": avg_loss_source,
            "target_test/loss": avg_task_loss_target,
            "source_test/accuracy": avg_accuracy_source,
            "source_test/f1": avg_f1_source,
            "source_test/f1_macro": avg_f1_macro_source,
            "source_test/f1_micro": avg_f1_micro_source,
            "target_test/accuracy": avg_accuracy_target,
            "target_test/f1": avg_f1_target,
            "target_test/f1_macro": avg_f1_macro_target,
            "target_test/f1_micro": avg_f1_micro_target,
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)
    def save_adapter(self, location, adapter_name):
        self.model.save_adapter(location, adapter_name)

    def configure_optimizers(self):
        optimizer = optim.AdamW(self.parameters(), lr=self.hparams["learning_rate"])
        lr_scheduler = {
            'scheduler': optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=2, threshold=0.0001, cooldown=0, min_lr=1e-8),
            'monitor': 'val_loss'
        }
        return [optimizer], [lr_scheduler]


In [4]:
import wandb

wandb.login()
# Wandb setup and training loop
seeds = [42, 10, 100]  # List of seeds
project_name = 'mixed_edited'  # Replace with your wandb project name
domain = 'GTR'  # Replace with the specific domain for this notebook
type = 'unipelt'  # Replace with the specific type for this notebook

# Initialize results dictionary
results = {
    "last_epoch": {
        "source_test/loss": [],
        "source_test/accuracy": [],
        "source_test/f1": [],
        "source_test/f1_macro": [],
        "source_test/f1_micro": [],
        "target_test/loss": [],
        "target_test/accuracy": [],
        "target_test/f1": [],
        "target_test/f1_macro": [],
        "target_test/f1_micro": [],
    },
    "best_model": {
        "source_test/loss": [],
        "source_test/accuracy": [],
        "source_test/f1": [],
        "source_test/f1_macro": [],
        "source_test/f1_micro": [],
        "target_test/loss": [],
        "target_test/accuracy": [],
        "target_test/f1": [],
        "target_test/f1_macro": [],
        "target_test/f1_micro": [],
    },
    "epoch_saved": {
        "source_test/loss": [],
        "source_test/accuracy": [],
        "source_test/f1": [],
        "source_test/f1_macro": [],
        "source_test/f1_micro": [],
        "target_test/loss": [],
        "target_test/accuracy": [],
        "target_test/f1": [],
        "target_test/f1_macro": [],
        "target_test/f1_micro": [],
    }
}

best_val_loss = float('inf')
best_model = None
best_model_path = ""

[34m[1mwandb[0m: Currently logged in as: [33mmrawhani5[0m ([33mmrawhani[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [5]:
reload(processed)
for seed in seeds:
    #wandb.init(project=project_name, name=f'{domain}_{type}_run_with_seed_{seed}', config={'seed': seed})

    try:
        seed_everything(seed)

        hparams = {
            "source_target": "government_travel",
            "source_domain": "government",
            "target_domain": "travel",
            "domain_adapter_name": "mlm_unipelt_TR",
            "task_adapter_name": "GTRPelt",
            "pretrained_model_name": "bert-base-uncased",
            "padding": "max_length",
            "max_seq_length": 128,
            "bsz": 32,
            "num_classes": 3,
            "learning_rate": 1e-4,
            "reduction_factor": 16,
            "mode": "domain",
            "saved_adapter_dir": "../../saved/adapters",
        }

        save_dir = "checkpoints"
        save_epoch_3 = 6  # Save model at the 3rd epoch
        
        dm = processed.DataModuleSourceTargetMixed(hparams)
        dm.setup('fit')
        dm.setup("test")
        source_length, target_length = dm.get_dataset_lengths()
        print(f"Source dataset length: {source_length}")
        print(f"Target dataset length: {target_length}")
        model = JointDomainTaskAdapter(hparams,source_length,target_length)

        checkpoint_callback = ModelCheckpoint(
            filename="task-GTRPelt-{epoch:02d}-{val_loss:.2f}",
            save_top_k=1,
            monitor="val_loss",
            mode="min",
        )
        save_model_callback_epoch = ModelCheckpoint(
            filename="GTRPelt-{epoch:02d}",
            every_n_epochs=save_epoch_3,
            save_top_k=-1,
        )

        #wandb_logger = WandbLogger()

    except Exception as e:
        print(f"Error during preprocessing : {e}")

    try:
        train_loader = dm.train_dataloader()
        val_loader = dm.val_dataloader()
        trainer = Trainer(
            max_epochs=10,
            accelerator="auto",
            precision=16,
            
            default_root_dir="checkpoints",
            #logger=wandb_logger,
            callbacks=[checkpoint_callback, save_model_callback_epoch],
            limit_train_batches=1.0,
            limit_val_batches=1.0,
            limit_test_batches=1.0,
        )

        trainer.fit(model, train_loader, val_loader)
        print(f"Best checkpoint path: {checkpoint_callback.best_model_path}")
        print(f"Saved epoch checkpoint path: {save_model_callback_epoch.best_model_path}")
    except Exception as e:
        print(f"Error during training : {e}")

    try:
        dm.setup("test")
        test_loader = dm.test_dataloader()
        test_results_last = trainer.test(model, test_loader)
        print("Test Results Last Epoch:", test_results_last)

        # Collect results for last epoch model
        for key, value in test_results_last[0].items():
            results["last_epoch"][key].append(value)

        # Paths to the saved checkpoints
        best_checkpoint_path = checkpoint_callback.best_model_path
        saved_epoch_checkpoint_path = save_model_callback_epoch.best_model_path
        print(f"Best checkpoint path: {best_checkpoint_path}")
        print(f"Saved epoch checkpoint path: {saved_epoch_checkpoint_path}")

        best_model = JointDomainTaskAdapter.load_from_checkpoint(best_checkpoint_path,source_dataset_length=source_length, target_dataset_length=target_length)
        test_results_best = trainer.test(best_model, test_loader)
        print("Test Results on Best Model:", test_results_best)
        for key, value in test_results_best[0].items():
            results["best_model"][key].append(value)

        saved_epoch_model = JointDomainTaskAdapter.load_from_checkpoint(saved_epoch_checkpoint_path,source_dataset_length=source_length, target_dataset_length=target_length)
        test_results_saved_epoch = trainer.test(saved_epoch_model, test_loader)
        print("Test Results on saved epoch:", test_results_saved_epoch)
        for key, value in test_results_saved_epoch[0].items():
            results["epoch_saved"][key].append(value)

    except Exception as e:
        print(f"Error during testing: {e}")

    #wandb.finish()

Seed set to 42




Batch size: 32


Source genre: government
Target genre: travel
Number of target samples: 69615


Source genre: government
Target genre: travel
Number of target samples: 69615


Source dataset length: 69615
Target dataset length: 24519


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  state_dict = torch.load(weights_file, map_location="cpu")


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/lightning_fabric/connector.py:563: `precision=16` is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
Using 16bit Automatic Mixed Precision (AMP)


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/pytorch_lightning/plugins/precision/amp.py:55: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.
GPU available: True (cuda), used: True


TPU available: False, using: 0 TPU cores


IPU available: False, using: 0 IPUs


HPU available: False, using: 0 HPUs


`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..


`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..


`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


eee



  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | BertAdapterModel   | 122 M 
1 | criterion     | CrossEntropyLoss   | 0     
2 | mlm_criterion | CrossEntropyLoss   | 0     
3 | accuracy      | MulticlassAccuracy | 0     
4 | f1            | MulticlassF1Score  | 0     
5 | f1_macro      | MulticlassF1Score  | 0     
6 | f1_micro      | MulticlassF1Score  | 0     
7 | softmax       | Softmax            | 0     
-----------------------------------------------------
12.9 M    Trainable params
109 M     Non-trainable params
122 M     Total params
489.615   Total estimated model params size (MB)


Sanity Checking: |                                                                                            …

val/accuracy: 0.359375
val/f1: 0.4715525805950165
val/taskclf_loss: 1.0967330932617188
val/loss: 1.2392339706420898
val/mlm_loss: 1.6438261270523071


Training: |                                                                                                   …

Validation: |                                                                                                 …

val/accuracy: 0.7915356755256653
val/f1: 0.7919949293136597
val/taskclf_loss: 0.5289080142974854
val/loss: 0.8191118836402893
val/mlm_loss: 1.6430667638778687


Validation: |                                                                                                 …

val/accuracy: 0.8076996207237244
val/f1: 0.8071926832199097
val/taskclf_loss: 0.490849107503891
val/loss: 0.7888422608375549
val/mlm_loss: 1.6349126100540161


Validation: |                                                                                                 …

val/accuracy: 0.8210788369178772
val/f1: 0.8211370706558228
val/taskclf_loss: 0.47488096356391907
val/loss: 0.776997447013855
val/mlm_loss: 1.6347746849060059


Validation: |                                                                                                 …

val/accuracy: 0.8222410082817078
val/f1: 0.822088897228241
val/taskclf_loss: 0.468886137008667
val/loss: 0.7736745476722717
val/mlm_loss: 1.639037847518921


Validation: |                                                                                                 …

val/accuracy: 0.8249527812004089
val/f1: 0.8250910043716431
val/taskclf_loss: 0.4659009873867035
val/loss: 0.7647908926010132
val/mlm_loss: 1.6134072542190552


Validation: |                                                                                                 …

val/accuracy: 0.8234537839889526
val/f1: 0.8232301473617554
val/taskclf_loss: 0.47313934564590454
val/loss: 0.771265983581543
val/mlm_loss: 1.6177152395248413


Validation: |                                                                                                 …

val/accuracy: 0.8280519247055054
val/f1: 0.8275828957557678
val/taskclf_loss: 0.47926807403564453
val/loss: 0.7732922434806824
val/mlm_loss: 1.6080937385559082


Validation: |                                                                                                 …

val/accuracy: 0.8264238238334656
val/f1: 0.8262184262275696
val/taskclf_loss: 0.4948214590549469
val/loss: 0.7845540642738342
val/mlm_loss: 1.607170820236206


Validation: |                                                                                                 …

val/accuracy: 0.8354630470275879
val/f1: 0.8353438377380371
val/taskclf_loss: 0.4892076849937439
val/loss: 0.7818591594696045
val/mlm_loss: 1.6127631664276123


Validation: |                                                                                                 …

`Trainer.fit` stopped: `max_epochs=10` reached.


val/accuracy: 0.8354125022888184
val/f1: 0.8352182507514954
val/taskclf_loss: 0.4951605200767517
val/loss: 0.785397469997406
val/mlm_loss: 1.6094461679458618


Best checkpoint path: checkpoints/lightning_logs/version_3/checkpoints/task-GTRPelt-epoch=04-val_loss=0.76.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_3/checkpoints/GTRPelt-epoch=05.ckpt


Source genre: government
Target genre: travel
Number of target samples: 69615


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results Last Epoch: [{'source_test/loss': 0.4870728552341461, 'source_test/accuracy': 0.8413978219032288, 'source_test/f1': 0.8408933877944946, 'source_test/f1_macro': 0.8377077579498291, 'source_test/f1_micro': 0.8413978219032288, 'target_test/loss': 0.6360140442848206, 'target_test/accuracy': 0.7864583134651184, 'target_test/f1': 0.786763608455658, 'target_test/f1_macro': 0.7770178914070129, 'target_test/f1_micro': 0.7864583134651184}]
Best checkpoint path: checkpoints/lightning_logs/version_3/checkpoints/task-GTRPelt-epoch=04-val_loss=0.76.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_3/checkpoints/GTRPelt-epoch=05.ckpt


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/lightning_fabric/utilities/cloud_io.py:57: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results on Best Model: [{'source_test/loss': 0.4681151509284973, 'source_test/accuracy': 0.8200604319572449, 'source_test/f1': 0.8195034861564636, 'source_test/f1_macro': 0.815052330493927, 'source_test/f1_micro': 0.8200604319572449, 'target_test/loss': 0.6111211776733398, 'target_test/accuracy': 0.7646169066429138, 'target_test/f1': 0.7643435597419739, 'target_test/f1_macro': 0.7567611932754517, 'target_test/f1_micro': 0.7646169066429138}]


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Seed set to 10


Test Results on saved epoch: [{'source_test/loss': 0.476651132106781, 'source_test/accuracy': 0.815524160861969, 'source_test/f1': 0.8149309754371643, 'source_test/f1_macro': 0.8111061453819275, 'source_test/f1_micro': 0.815524160861969, 'target_test/loss': 0.6140515804290771, 'target_test/accuracy': 0.7622647881507874, 'target_test/f1': 0.7620352506637573, 'target_test/f1_macro': 0.751812219619751, 'target_test/f1_micro': 0.7622647881507874}]
Batch size: 32


Source genre: government
Target genre: travel
Number of target samples: 69615


Source genre: government


Target genre: travel
Number of target samples: 69615
Source dataset length: 69615
Target dataset length: 24519


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Using 16bit Automatic Mixed Precision (AMP)


GPU available: True (cuda), used: True


TPU available: False, using: 0 TPU cores


IPU available: False, using: 0 IPUs


HPU available: False, using: 0 HPUs


`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..


`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..


`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | BertAdapterModel   | 122 M 
1 | criterion     | CrossEntropyLoss   | 0     
2 | mlm_criterion | CrossEntropyLoss   | 0     
3 | accuracy      | MulticlassAccuracy | 0     
4 | f1            | MulticlassF1Score  | 0     
5 | f1_macro      | MulticlassF1Score  | 0     
6 | f1_micro      | MulticlassF1Score  | 0     
7 | softmax       | Softmax            | 0     
-----------------------------------------------------
12.9 M    Trainable params
109 M     Non-trainable params
122 M     Total params
489.615   Total estimated model params size (MB)


eee


Sanity Checking: |                                                                                            …

val/accuracy: 0.359375
val/f1: 0.4217357933521271
val/taskclf_loss: 1.0959320068359375
val/loss: 1.2561306953430176
val/mlm_loss: 1.7109711170196533


Training: |                                                                                                   …

Validation: |                                                                                                 …

val/accuracy: 0.7856460809707642
val/f1: 0.7855587601661682
val/taskclf_loss: 0.5389004349708557
val/loss: 0.8309357166290283
val/mlm_loss: 1.6600900888442993


Validation: |                                                                                                 …

val/accuracy: 0.8079578876495361
val/f1: 0.8083084225654602
val/taskclf_loss: 0.4951265752315521
val/loss: 0.7901849746704102
val/mlm_loss: 1.627922773361206


Validation: |                                                                                                 …

val/accuracy: 0.8135891556739807
val/f1: 0.813674807548523
val/taskclf_loss: 0.48778828978538513
val/loss: 0.7838695645332336
val/mlm_loss: 1.6245115995407104


Validation: |                                                                                                 …

val/accuracy: 0.8175922632217407
val/f1: 0.8173437118530273
val/taskclf_loss: 0.49027296900749207
val/loss: 0.7855823040008545
val/mlm_loss: 1.624032735824585


Validation: |                                                                                                 …

val/accuracy: 0.821982741355896
val/f1: 0.8216890692710876
val/taskclf_loss: 0.4804445207118988
val/loss: 0.7746163010597229
val/mlm_loss: 1.6098365783691406


Validation: |                                                                                                 …

val/accuracy: 0.8254188299179077
val/f1: 0.8251941204071045
val/taskclf_loss: 0.49653053283691406
val/loss: 0.7898402214050293
val/mlm_loss: 1.6226128339767456


Validation: |                                                                                                 …

val/accuracy: 0.8257275819778442
val/f1: 0.8257147073745728
val/taskclf_loss: 0.4988672137260437
val/loss: 0.7869877219200134
val/mlm_loss: 1.6050273180007935


Validation: |                                                                                                 …

val/accuracy: 0.8248236179351807
val/f1: 0.8246909379959106
val/taskclf_loss: 0.49511831998825073
val/loss: 0.7887561321258545
val/mlm_loss: 1.6224603652954102


Validation: |                                                                                                 …

val/accuracy: 0.829988956451416
val/f1: 0.8298903107643127
val/taskclf_loss: 0.500049352645874
val/loss: 0.7881876230239868
val/mlm_loss: 1.6062774658203125


Validation: |                                                                                                 …

`Trainer.fit` stopped: `max_epochs=10` reached.


val/accuracy: 0.8315384984016418
val/f1: 0.8316806554794312
val/taskclf_loss: 0.5054500699043274
val/loss: 0.7886710166931152
val/mlm_loss: 1.592799425125122


Best checkpoint path: checkpoints/lightning_logs/version_4/checkpoints/task-GTRPelt-epoch=04-val_loss=0.77.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_4/checkpoints/GTRPelt-epoch=05.ckpt


Source genre: government


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Target genre: travel
Number of target samples: 69615


Testing: |                                                                                                    …

Test Results Last Epoch: [{'source_test/loss': 0.5186325907707214, 'source_test/accuracy': 0.8333333134651184, 'source_test/f1': 0.8323970437049866, 'source_test/f1_macro': 0.8290648460388184, 'source_test/f1_micro': 0.8333333134651184, 'target_test/loss': 0.6318691968917847, 'target_test/accuracy': 0.7881383895874023, 'target_test/f1': 0.7892661094665527, 'target_test/f1_macro': 0.778795063495636, 'target_test/f1_micro': 0.7881383895874023}]
Best checkpoint path: checkpoints/lightning_logs/version_4/checkpoints/task-GTRPelt-epoch=04-val_loss=0.77.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_4/checkpoints/GTRPelt-epoch=05.ckpt


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results on Best Model: [{'source_test/loss': 0.500343918800354, 'source_test/accuracy': 0.8104838728904724, 'source_test/f1': 0.8098773956298828, 'source_test/f1_macro': 0.8059744834899902, 'source_test/f1_micro': 0.8104838728904724, 'target_test/loss': 0.5952270030975342, 'target_test/accuracy': 0.769993245601654, 'target_test/f1': 0.7688544392585754, 'target_test/f1_macro': 0.7623730301856995, 'target_test/f1_micro': 0.769993245601654}]


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Seed set to 100


Test Results on saved epoch: [{'source_test/loss': 0.5025936365127563, 'source_test/accuracy': 0.8170362710952759, 'source_test/f1': 0.816266655921936, 'source_test/f1_macro': 0.8118002414703369, 'source_test/f1_micro': 0.8170362710952759, 'target_test/loss': 0.6092156767845154, 'target_test/accuracy': 0.7777217626571655, 'target_test/f1': 0.7769092917442322, 'target_test/f1_macro': 0.7703200578689575, 'target_test/f1_micro': 0.7777217626571655}]
Batch size: 32


Source genre: government


Target genre: travel
Number of target samples: 69615


Source genre: government
Target genre: travel
Number of target samples: 69615


Source dataset length: 69615
Target dataset length: 24519


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Using 16bit Automatic Mixed Precision (AMP)


GPU available: True (cuda), used: True


TPU available: False, using: 0 TPU cores


IPU available: False, using: 0 IPUs


HPU available: False, using: 0 HPUs


`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..


`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..


`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | BertAdapterModel   | 122 M 
1 | criterion     | CrossEntropyLoss   | 0     
2 | mlm_criterion | CrossEntropyLoss   | 0     
3 | accuracy      | MulticlassAccuracy | 0     
4 | f1            | MulticlassF1Score  | 0     
5 | f1_macro      | MulticlassF1Score  | 0     
6 | f1_micro      | MulticlassF1Score  | 0     
7 | softmax       | Softmax            | 0     
-----------------------------------------------------
12.9 M    Trainable params
109 M     Non-trainable params
122 M     Total params
489.615   Total estimated model params size (MB)


eee


Sanity Checking: |                                                                                            …

val/accuracy: 0.265625
val/f1: 0.27738991379737854
val/taskclf_loss: 1.1019134521484375
val/loss: 1.245848298072815
val/mlm_loss: 1.6545119285583496


Training: |                                                                                                   …

Validation: |                                                                                                 …

val/accuracy: 0.7888463139533997
val/f1: 0.7883755564689636
val/taskclf_loss: 0.5356494784355164
val/loss: 0.8201437592506409
val/mlm_loss: 1.627887487411499


Validation: |                                                                                                 …

val/accuracy: 0.8110570907592773
val/f1: 0.8109168410301208
val/taskclf_loss: 0.4946911334991455
val/loss: 0.790250837802887
val/mlm_loss: 1.6294118165969849


Validation: |                                                                                                 …

val/accuracy: 0.8207420110702515
val/f1: 0.8205609917640686
val/taskclf_loss: 0.4738260805606842
val/loss: 0.7753918766975403
val/mlm_loss: 1.6316055059432983


Validation: |                                                                                                 …

val/accuracy: 0.8269403576850891
val/f1: 0.8268259763717651
val/taskclf_loss: 0.4719368517398834
val/loss: 0.7713406085968018
val/mlm_loss: 1.6214159727096558


Validation: |                                                                                                 …

val/accuracy: 0.8254693150520325
val/f1: 0.8246536254882812
val/taskclf_loss: 0.4699324667453766
val/loss: 0.7712584733963013
val/mlm_loss: 1.6267914772033691


Validation: |                                                                                                 …

val/accuracy: 0.8282316327095032
val/f1: 0.8280337452888489
val/taskclf_loss: 0.4703378975391388
val/loss: 0.7739005088806152
val/mlm_loss: 1.6357835531234741


Validation: |                                                                                                 …

val/accuracy: 0.8302977681159973
val/f1: 0.8296055793762207
val/taskclf_loss: 0.47827765345573425
val/loss: 0.7748199701309204
val/mlm_loss: 1.6167709827423096


Validation: |                                                                                                 …

val/accuracy: 0.8283607959747314
val/f1: 0.8281058073043823
val/taskclf_loss: 0.48965999484062195
val/loss: 0.7785435914993286
val/mlm_loss: 1.5987497568130493


Validation: |                                                                                                 …

val/accuracy: 0.8383039832115173
val/f1: 0.8381292819976807
val/taskclf_loss: 0.48232853412628174
val/loss: 0.7757577896118164
val/mlm_loss: 1.608870267868042


Validation: |                                                                                                 …

`Trainer.fit` stopped: `max_epochs=10` reached.


val/accuracy: 0.8390787839889526
val/f1: 0.8387489914894104
val/taskclf_loss: 0.48541802167892456
val/loss: 0.7761726975440979
val/mlm_loss: 1.6016916036605835


Best checkpoint path: checkpoints/lightning_logs/version_5/checkpoints/task-GTRPelt-epoch=04-val_loss=0.77.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_5/checkpoints/GTRPelt-epoch=05.ckpt


Source genre: government
Target genre: travel
Number of target samples: 69615


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results Last Epoch: [{'source_test/loss': 0.4682563841342926, 'source_test/accuracy': 0.8429099321365356, 'source_test/f1': 0.8426440954208374, 'source_test/f1_macro': 0.837854266166687, 'source_test/f1_micro': 0.8429099321365356, 'target_test/loss': 0.6249379515647888, 'target_test/accuracy': 0.7785617709159851, 'target_test/f1': 0.7782869338989258, 'target_test/f1_macro': 0.7687233686447144, 'target_test/f1_micro': 0.7785617709159851}]
Best checkpoint path: checkpoints/lightning_logs/version_5/checkpoints/task-GTRPelt-epoch=04-val_loss=0.77.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_5/checkpoints/GTRPelt-epoch=05.ckpt


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results on Best Model: [{'source_test/loss': 0.4664005935192108, 'source_test/accuracy': 0.8222446441650391, 'source_test/f1': 0.8214095234870911, 'source_test/f1_macro': 0.8176438212394714, 'source_test/f1_micro': 0.8222446441650391, 'target_test/loss': 0.608782172203064, 'target_test/accuracy': 0.7538642287254333, 'target_test/f1': 0.7534435391426086, 'target_test/f1_macro': 0.7440335154533386, 'target_test/f1_micro': 0.7538642287254333}]


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results on saved epoch: [{'source_test/loss': 0.4618178904056549, 'source_test/accuracy': 0.828125, 'source_test/f1': 0.8277291059494019, 'source_test/f1_macro': 0.823087215423584, 'source_test/f1_micro': 0.828125, 'target_test/loss': 0.6006073355674744, 'target_test/accuracy': 0.7708333134651184, 'target_test/f1': 0.7714500427246094, 'target_test/f1_macro': 0.7614588141441345, 'target_test/f1_micro': 0.7708333134651184}]


In [6]:
results.items()

dict_items([('last_epoch', {'source_test/loss': [0.4870728552341461, 0.5186325907707214, 0.4682563841342926], 'source_test/accuracy': [0.8413978219032288, 0.8333333134651184, 0.8429099321365356], 'source_test/f1': [0.8408933877944946, 0.8323970437049866, 0.8426440954208374], 'source_test/f1_macro': [0.8377077579498291, 0.8290648460388184, 0.837854266166687], 'source_test/f1_micro': [0.8413978219032288, 0.8333333134651184, 0.8429099321365356], 'target_test/loss': [0.6360140442848206, 0.6318691968917847, 0.6249379515647888], 'target_test/accuracy': [0.7864583134651184, 0.7881383895874023, 0.7785617709159851], 'target_test/f1': [0.786763608455658, 0.7892661094665527, 0.7782869338989258], 'target_test/f1_macro': [0.7770178914070129, 0.778795063495636, 0.7687233686447144], 'target_test/f1_micro': [0.7864583134651184, 0.7881383895874023, 0.7785617709159851]}), ('best_model', {'source_test/loss': [0.4681151509284973, 0.500343918800354, 0.4664005935192108], 'source_test/accuracy': [0.820060431

In [7]:
# Calculate mean and standard deviation for each scenario
mean_results = {scenario: {key: np.mean(values) for key, values in metrics.items()} for scenario, metrics in results.items()}
std_results = {scenario: {key: np.std(values) for key, values in metrics.items()} for scenario, metrics in results.items()}

# Log mean and standard deviation results to wandb
# wandb.init(project=project_name, name=f'{domain}_mean_results')
# for scenario in mean_results:
#     for key, value in mean_results[scenario].items():
#         wandb.log({f"{scenario}/{key}": value})
#         wandb.log({f"{scenario}/{key}_std": std_results[scenario][key]})
# wandb.finish()

print("Mean Results:", mean_results)
print("Standard Deviation Results:", std_results)

# # Save the best model's adapter
# if model:
#     adapter_save_path = f"../../saved/adapter_after_run/{hparams['task_adapter_name']}"
#     model.save_adapter(adapter_save_path, hparams['task_adapter_name'])
#     print(f"Adapter saved to {adapter_save_path}")
# else:
#     print("No best model to save.")

Mean Results: {'last_epoch': {'source_test/loss': 0.4913206100463867, 'source_test/accuracy': 0.8392136891682943, 'source_test/f1': 0.8386448423067728, 'source_test/f1_macro': 0.8348756233851115, 'source_test/f1_micro': 0.8392136891682943, 'target_test/loss': 0.6309403975804647, 'target_test/accuracy': 0.784386157989502, 'target_test/f1': 0.7847722172737122, 'target_test/f1_macro': 0.7748454411824545, 'target_test/f1_micro': 0.784386157989502}, 'best_model': {'source_test/loss': 0.4782865544160207, 'source_test/accuracy': 0.8175963163375854, 'source_test/f1': 0.8169301350911459, 'source_test/f1_macro': 0.8128902117411295, 'source_test/f1_micro': 0.8175963163375854, 'target_test/loss': 0.6050434509913126, 'target_test/accuracy': 0.7628247936566671, 'target_test/f1': 0.7622138460477194, 'target_test/f1_macro': 0.75438924630483, 'target_test/f1_micro': 0.7628247936566671}, 'epoch_saved': {'source_test/loss': 0.4803542196750641, 'source_test/accuracy': 0.8202284773190817, 'source_test/f1':

In [8]:
print('dones')

dones


In [9]:
best_val_loss

inf