In [1]:

import torch
import gc
torch.cuda.empty_cache()
gc.collect()


0

In [2]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
# Initialize the console

# Step 2: Import necessary libraries
from typing import Optional, Dict, Any
import os
import torch
import pytorch_lightning as pl
import pandas as pd
from transformers import AutoTokenizer
from torch.utils.data import Dataset, DataLoader
from rich.traceback import install
from pytorch_lightning.loggers import WandbLogger
import torch.nn as nn
import torch.optim as optim
from transformers import AutoConfig
from adapters import AutoAdapterModel, AdapterConfig
from adapters.composition import Stack
from pytorch_lightning import seed_everything, Trainer
from pytorch_lightning.callbacks import ModelCheckpoint,EarlyStopping
import torchmetrics

install(show_locals=True)

from setup import setup_src_path
print(setup_src_path())
import data.processed as processed
import config.config as config
import utils.setup as setup
import utils.functions as fn
from importlib import reload

from datasets import load_from_disk

print(config.Config.TXT_SAVE_PATH)
print(config.Config.MODEL_SAVE_PATH)

dataset = load_from_disk(f"../{config.Config.DATASETS_SAVE_PATH}/datasets")


['/usr/lib/python38.zip', '/usr/lib/python3.8', '/usr/lib/python3.8/lib-dynload', '', '/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages', '/tmp/tmp58e9d6fy', '/home/guest/Desktop/projects/third-experiments/domain_adaptation_project/modules']


2024-09-15 22:49:45.167645: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-09-15 22:49:45.315998: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.




./text-files/
./hp-model-


In [3]:
import torch
import os
import pytorch_lightning as pl
from transformers import  AutoConfig, DataCollatorForLanguageModeling
from collections import defaultdict
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
import numpy as np
import torchmetrics

class JointDomainTaskAdapter(pl.LightningModule):
    def __init__(self, hparams,source_dataset_length,target_dataset_length):
        super(JointDomainTaskAdapter, self).__init__()
        self.save_hyperparameters(hparams)

        # Load config with hidden states output
        self.config = AutoConfig.from_pretrained(self.hparams["pretrained_model_name"])
        self.config.output_hidden_states = True
        self.model = AutoAdapterModel.from_pretrained(self.hparams["pretrained_model_name"], config=self.config)

        # Set reduction factor and leave_out layers
        self.reduction_factor = self.hparams.get("reduction_factor", 16)
        self.leave_out = self.hparams.get("leave_out", [])
        # if self.leave_out != "None":
        #     self.leave_out = self.leave_out.split(",")
        #     self.leave_out = [int(i) for i in self.leave_out]
        # else:
        #     self.leave_out = []

        # Load MLM adapter with head
        self.model.load_adapter(f"{self.hparams['saved_adapter_dir']}/{self.hparams['domain_adapter_name']}", with_head=True)

        # Add classification head for the task
        self.model.add_classification_head(f"{self.hparams['task_adapter_name']}", num_labels=self.hparams["num_classes"])

        # Set active adapters
        self.model.train_adapter(self.hparams['domain_adapter_name'])
        # Calculate alpha based on dataset lengths
        self.alpha = source_dataset_length / (source_dataset_length + target_dataset_length)

        # Initialize loss functions and metrics
        self.criterion = nn.CrossEntropyLoss()
        self.mlm_criterion = nn.CrossEntropyLoss()
        self.accuracy = torchmetrics.Accuracy(task='multiclass', num_classes=self.hparams["num_classes"])
        self.f1 = torchmetrics.F1Score(task='multiclass', num_classes=self.hparams["num_classes"], average="weighted")
        self.f1_macro = torchmetrics.F1Score(task='multiclass', num_classes=self.hparams["num_classes"], average="macro")
        self.f1_micro = torchmetrics.F1Score(task='multiclass',num_classes=self.hparams["num_classes"], average="micro")

        self.softmax = nn.Softmax(dim=1)
        self.validation_outputs = []
        self.test_outputs = []
        # Optimizer related variables
        self.learning_rate = self.hparams.get("learning_rate", 1e-4)
        self.scheduler_factor = self.hparams.get("scheduler_factor", 0.1)
        self.scheduler_patience = self.hparams.get("scheduler_patience", 0.05)
        self.scheduler_threshold = self.hparams.get("scheduler_threshold", 0.0001)
        self.scheduler_cooldown = self.hparams.get("scheduler_cooldown", 0)
        self.scheduler_eps = self.hparams.get("scheduler_eps", 1e-8)

    def forward(self, input_ids, attention_mask=None, labels=None, task=None):
        if task == "mlm":
            self.model.active_head= self.hparams['domain_adapter_name']
            outputs = self.model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        elif task == "classification":
            self.model.active_head= self.hparams['task_adapter_name']
            outputs = self.model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        else:
            raise ValueError("Task must be either 'mlm' or 'classification'.")
        return outputs

    def training_step(self, batch, batch_idx):
        # Separate source and target data processing
        source_input_ids = batch["source_input_ids"]
        source_attention_mask = batch["source_attention_mask"]
        source_labels = batch["label_source"]

        target_input_ids = batch["target_input_ids"]
        target_attention_mask = batch["target_attention_mask"]
        mlm_labels = batch["mlm_labels"]

        # Calculate dynamic alpha based on the lengths of source and target data
        alpha = self.alpha

        # Classification task
        cls_outputs = self(input_ids=source_input_ids, attention_mask=source_attention_mask, task="classification")
        cls_logits = cls_outputs.logits
        task_loss = self.criterion(cls_logits, source_labels)

        # MLM task
        mlm_outputs = self(input_ids=target_input_ids, attention_mask=target_attention_mask, labels=mlm_labels, task="mlm")
        mlm_loss = mlm_outputs.loss

        # Combine losses
        loss = alpha * task_loss + (1 - alpha) * mlm_loss

        accuracy = self.accuracy(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))
        f1 = self.f1(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))

        metrics = {
            "train/accuracy": accuracy,
            "train/f1": f1,
            "train/taskclf_loss": task_loss,
            "train/loss": loss,
            "train/mlm_loss": mlm_loss
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)

        return loss

    def validation_step(self, batch, batch_idx):
        # Separate source and target data processing
        source_input_ids = batch["source_input_ids"]
        source_attention_mask = batch["source_attention_mask"]
        source_labels = batch["label_source"]

        target_input_ids = batch["target_input_ids"]
        target_attention_mask = batch["target_attention_mask"]
        mlm_labels = batch["mlm_labels"]

        # Calculate dynamic alpha based on the lengths of source and target data
        alpha = self.alpha
        # Classification task
        cls_outputs = self(input_ids=source_input_ids, attention_mask=source_attention_mask, task="classification")
        cls_logits = cls_outputs.logits
        task_loss = self.criterion(cls_logits, source_labels)

        # MLM task
        mlm_outputs = self(input_ids=target_input_ids, attention_mask=target_attention_mask, labels=mlm_labels, task="mlm")
        mlm_loss = mlm_outputs.loss

        # Combine losses
        loss = alpha * task_loss + (1 - alpha) * mlm_loss

        accuracy = self.accuracy(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))
        f1 = self.f1(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))
        self.validation_outputs.append({
            "val/accuracy": accuracy,
            "val/f1": f1,
            "val/taskclf_loss": task_loss,
            "val/loss": loss,
            "val/mlm_loss": mlm_loss
                })
        
        metrics = {
            "val/accuracy": accuracy,
            "val/f1": f1,
            "val/taskclf_loss": task_loss,
            "val/loss": loss,
            "val/mlm_loss": mlm_loss
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)

        return metrics
    def on_validation_epoch_start(self):
        self.validation_outputs = []
    
    def on_validation_epoch_end(self):
        outputs= self.validation_outputs

        avg_loss = torch.stack([x["val/loss"] for x in outputs]).mean()
        avg_task_loss = torch.stack([x["val/taskclf_loss"] for x in outputs]).mean()
        avg_mlm_loss = torch.stack([x["val/mlm_loss"] for x in outputs]).mean()
        avg_accuracy = torch.stack([x["val/accuracy"] for x in outputs]).mean()
        avg_f1 = torch.stack([x["val/f1"] for x in outputs]).mean()
        print(f"val/accuracy: {avg_accuracy}")
        print(f"val/f1: {avg_f1}")
        print(f"val/taskclf_loss: {avg_task_loss}")
        print(f"val/loss: {avg_loss}")
        print(f"val/mlm_loss: {avg_mlm_loss}")
        metrics = {
            "val/avg_loss": avg_loss,
            "val/avg_taskclf_loss": avg_task_loss,
            "val/avg_mlm_loss": avg_mlm_loss,
            "val/avg_accuracy": avg_accuracy,
            "val/avg_f1": avg_f1,
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)
        self.log("val_loss", avg_loss)

    def test_step(self, batch, batch_idx):
        # Separate source and target data processing
        source_input_ids = batch["source_input_ids"]
        source_attention_mask = batch["source_attention_mask"]
        source_labels = batch["label_source"]

        target_input_ids = batch["target_input_ids"]
        target_attention_mask = batch["target_attention_mask"]
        target_labels = batch["label_target"]

        # Classification task for source data
        cls_outputs_source = self(input_ids=source_input_ids, attention_mask=source_attention_mask, task="classification")
        cls_logits_source = cls_outputs_source.logits
        task_loss_source = self.criterion(cls_logits_source, source_labels)

        # Classification task for target data
        cls_outputs_target = self(input_ids=target_input_ids, attention_mask=target_attention_mask, task="classification")
        cls_logits_target = cls_outputs_target.logits
        task_loss_target = self.criterion(cls_logits_target, target_labels)

        # Combine losses (though typically you would evaluate them separately)
        loss = task_loss_source + task_loss_target

        accuracy_source = self.accuracy(source_labels, torch.argmax(self.softmax(cls_logits_source), dim=1))
        f1_source = self.f1(source_labels, torch.argmax(self.softmax(cls_logits_source), dim=1))
        f1_macro_source = self.f1_macro(source_labels, torch.argmax(self.softmax(cls_logits_source), dim=1))
        f1_micro_source = self.f1_micro(source_labels, torch.argmax(self.softmax(cls_logits_source), dim=1))
    
        accuracy_target = self.accuracy(target_labels, torch.argmax(self.softmax(cls_logits_target), dim=1))
        f1_target = self.f1(target_labels, torch.argmax(self.softmax(cls_logits_target), dim=1))
        f1_macro_target = self.f1_macro(target_labels, torch.argmax(self.softmax(cls_logits_target), dim=1))
        f1_micro_target = self.f1_micro(target_labels, torch.argmax(self.softmax(cls_logits_target), dim=1))

        metrics = {
            "source_test/loss": task_loss_source,
            "source_test/accuracy": accuracy_source,
            "source_test/f1": f1_source,
            "source_test/f1_macro": f1_macro_source,
            "source_test/f1_micro": f1_micro_source,
            "target_test/loss": task_loss_target,
            "target_test/accuracy": accuracy_target,
            "target_test/f1": f1_target,
            "target_test/f1_macro": f1_macro_target,
            "target_test/f1_micro": f1_micro_target,
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)

        self.test_outputs.append({
            "source_test/loss": task_loss_source,
            "source_test/accuracy": accuracy_source,
            "source_test/f1": f1_source,
            "source_test/f1_macro": f1_macro_source,
            "source_test/f1_micro": f1_micro_source,
            "target_test/loss": task_loss_target,
            "target_test/accuracy": accuracy_target,
            "target_test/f1": f1_target,
            "target_test/f1_macro": f1_macro_target,
            "target_test/f1_micro": f1_micro_target,
        })
        return metrics
    def on_test_epoch_start(self):
        self.test_outputs = []

    def on_test_epoch_end(self):
        outputs=  self.test_outputs

        avg_loss_source = torch.stack([x["source_test/loss"] for x in outputs]).mean()
        avg_task_loss_target = torch.stack([x["target_test/loss"] for x in outputs]).mean()
        avg_accuracy_source = torch.stack([x["source_test/accuracy"] for x in outputs]).mean()
        avg_f1_source = torch.stack([x["source_test/f1"] for x in outputs]).mean()
        avg_f1_macro_source = torch.stack([x["source_test/f1_macro"] for x in outputs]).mean()
        avg_f1_micro_source = torch.stack([x["source_test/f1_micro"] for x in outputs]).mean()

        avg_accuracy_target = torch.stack([x["target_test/accuracy"] for x in outputs]).mean()
        avg_f1_target = torch.stack([x["target_test/f1"] for x in outputs]).mean()
        avg_f1_macro_target = torch.stack([x["target_test/f1_macro"] for x in outputs]).mean()
        avg_f1_micro_target = torch.stack([x["target_test/f1_micro"] for x in outputs]).mean()

        metrics = {
            "source_test/loss": avg_loss_source,
            "target_test/loss": avg_task_loss_target,
            "source_test/accuracy": avg_accuracy_source,
            "source_test/f1": avg_f1_source,
            "source_test/f1_macro": avg_f1_macro_source,
            "source_test/f1_micro": avg_f1_micro_source,
            "target_test/accuracy": avg_accuracy_target,
            "target_test/f1": avg_f1_target,
            "target_test/f1_macro": avg_f1_macro_target,
            "target_test/f1_micro": avg_f1_micro_target,
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)
    def save_adapter(self, location, adapter_name):
        self.model.save_adapter(location, adapter_name)

    def configure_optimizers(self):
        optimizer = optim.AdamW(self.parameters(), lr=self.hparams["learning_rate"])
        lr_scheduler = {
            'scheduler': optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=2, threshold=0.0001, cooldown=0, min_lr=1e-8),
            'monitor': 'val_loss'
        }
        return [optimizer], [lr_scheduler]


In [4]:
import wandb

wandb.login()
# Wandb setup and training loop
seeds = [42, 10, 100]  # List of seeds
project_name = 'mixed_edited'  # Replace with your wandb project name
domain = 'TEF'  # Replace with the specific domain for this notebook
type = 'union'  # Replace with the specific type for this notebook

# Initialize results dictionary
results = {
    "last_epoch": {
        "source_test/loss": [],
        "source_test/accuracy": [],
        "source_test/f1": [],
        "source_test/f1_macro": [],
        "source_test/f1_micro": [],
        "target_test/loss": [],
        "target_test/accuracy": [],
        "target_test/f1": [],
        "target_test/f1_macro": [],
        "target_test/f1_micro": [],
    },
    "best_model": {
        "source_test/loss": [],
        "source_test/accuracy": [],
        "source_test/f1": [],
        "source_test/f1_macro": [],
        "source_test/f1_micro": [],
        "target_test/loss": [],
        "target_test/accuracy": [],
        "target_test/f1": [],
        "target_test/f1_macro": [],
        "target_test/f1_micro": [],
    },
    "epoch_saved": {
        "source_test/loss": [],
        "source_test/accuracy": [],
        "source_test/f1": [],
        "source_test/f1_macro": [],
        "source_test/f1_micro": [],
        "target_test/loss": [],
        "target_test/accuracy": [],
        "target_test/f1": [],
        "target_test/f1_macro": [],
        "target_test/f1_micro": [],
    }
}

best_val_loss = float('inf')
best_model = None
best_model_path = ""

[34m[1mwandb[0m: Currently logged in as: [33mmrawhani5[0m ([33mmrawhani[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [5]:
reload(processed)
for seed in seeds:
    #wandb.init(project=project_name, name=f'{domain}_{type}_run_with_seed_{seed}', config={'seed': seed})

    try:
        seed_everything(seed)

        hparams = {
            "source_target": "telephone_fiction",
            "source_domain": "telephone",
            "target_domain": "fiction",
            "domain_adapter_name": "mlm_union_F",
            "task_adapter_name": "TEFUni",
            "pretrained_model_name": "bert-base-uncased",
            "padding": "max_length",
            "max_seq_length": 128,
            "bsz": 32,
            "num_classes": 3,
            "learning_rate": 1e-4,
            "reduction_factor": 16,
            "mode": "domain",
            "saved_adapter_dir": "../../saved/adapters",
        }

        save_dir = "checkpoints"
        save_epoch_3 = 6  # Save model at the 3rd epoch
        
        dm = processed.DataModuleSourceTargetMixed(hparams)
        dm.setup('fit')
        dm.setup("test")
        source_length, target_length = dm.get_dataset_lengths()
        print(f"Source dataset length: {source_length}")
        print(f"Target dataset length: {target_length}")
        model = JointDomainTaskAdapter(hparams,source_length,target_length)

        checkpoint_callback = ModelCheckpoint(
            filename="task-TEFUni-{epoch:02d}-{val_loss:.2f}",
            save_top_k=1,
            monitor="val_loss",
            mode="min",
        )
        save_model_callback_epoch = ModelCheckpoint(
            filename="TEFUni-{epoch:02d}",
            every_n_epochs=save_epoch_3,
            save_top_k=-1,
        )

        #wandb_logger = WandbLogger()

    except Exception as e:
        print(f"Error during preprocessing : {e}")

    try:
        train_loader = dm.train_dataloader()
        val_loader = dm.val_dataloader()
        trainer = Trainer(
            max_epochs=10,
            accelerator="auto",
            precision=16,
            
            default_root_dir="checkpoints",
            #logger=wandb_logger,
            callbacks=[checkpoint_callback, save_model_callback_epoch],
            limit_train_batches=1.0,
            limit_val_batches=1.0,
            limit_test_batches=1.0,
        )

        trainer.fit(model, train_loader, val_loader)
        print(f"Best checkpoint path: {checkpoint_callback.best_model_path}")
        print(f"Saved epoch checkpoint path: {save_model_callback_epoch.best_model_path}")
    except Exception as e:
        print(f"Error during training : {e}")

    try:
        dm.setup("test")
        test_loader = dm.test_dataloader()
        test_results_last = trainer.test(model, test_loader)
        print("Test Results Last Epoch:", test_results_last)

        # Collect results for last epoch model
        for key, value in test_results_last[0].items():
            results["last_epoch"][key].append(value)

        # Paths to the saved checkpoints
        best_checkpoint_path = checkpoint_callback.best_model_path
        saved_epoch_checkpoint_path = save_model_callback_epoch.best_model_path
        print(f"Best checkpoint path: {best_checkpoint_path}")
        print(f"Saved epoch checkpoint path: {saved_epoch_checkpoint_path}")

        best_model = JointDomainTaskAdapter.load_from_checkpoint(best_checkpoint_path,source_dataset_length=source_length, target_dataset_length=target_length)
        test_results_best = trainer.test(best_model, test_loader)
        print("Test Results on Best Model:", test_results_best)
        for key, value in test_results_best[0].items():
            results["best_model"][key].append(value)

        saved_epoch_model = JointDomainTaskAdapter.load_from_checkpoint(saved_epoch_checkpoint_path,source_dataset_length=source_length, target_dataset_length=target_length)
        test_results_saved_epoch = trainer.test(saved_epoch_model, test_loader)
        print("Test Results on saved epoch:", test_results_saved_epoch)
        for key, value in test_results_saved_epoch[0].items():
            results["epoch_saved"][key].append(value)

    except Exception as e:
        print(f"Error during testing: {e}")

    #wandb.finish()

Seed set to 42




Batch size: 32


Source genre: telephone
Target genre: fiction
Number of target samples: 69613


Source genre: telephone
Target genre: fiction
Number of target samples: 69613


Source dataset length: 75013
Target dataset length: 15922


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  state_dict = torch.load(weights_file, map_location="cpu")


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/lightning_fabric/connector.py:563: `precision=16` is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
Using 16bit Automatic Mixed Precision (AMP)


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/pytorch_lightning/plugins/precision/amp.py:55: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.
GPU available: True (cuda), used: True


TPU available: False, using: 0 TPU cores


IPU available: False, using: 0 IPUs


HPU available: False, using: 0 HPUs


`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..


`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..


`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


eee



  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | BertAdapterModel   | 119 M 
1 | criterion     | CrossEntropyLoss   | 0     
2 | mlm_criterion | CrossEntropyLoss   | 0     
3 | accuracy      | MulticlassAccuracy | 0     
4 | f1            | MulticlassF1Score  | 0     
5 | f1_macro      | MulticlassF1Score  | 0     
6 | f1_micro      | MulticlassF1Score  | 0     
7 | softmax       | Softmax            | 0     
-----------------------------------------------------
9.5 M     Trainable params
109 M     Non-trainable params
119 M     Total params
476.013   Total estimated model params size (MB)


Sanity Checking: |                                                                                            …

val/accuracy: 0.234375
val/f1: 0.2821221947669983
val/taskclf_loss: 1.1097488403320312
val/loss: 1.1939928531646729
val/mlm_loss: 1.5908902883529663


Training: |                                                                                                   …

Validation: |                                                                                                 …

val/accuracy: 0.770769476890564
val/f1: 0.7706559896469116
val/taskclf_loss: 0.5608477592468262
val/loss: 0.7368762493133545
val/mlm_loss: 1.5661959648132324


Validation: |                                                                                                 …

val/accuracy: 0.7882503271102905
val/f1: 0.7888385653495789
val/taskclf_loss: 0.5394446849822998
val/loss: 0.7161309719085693
val/mlm_loss: 1.5485495328903198


Validation: |                                                                                                 …

val/accuracy: 0.7897190451622009
val/f1: 0.7892654538154602
val/taskclf_loss: 0.5583484172821045
val/loss: 0.7308939695358276
val/mlm_loss: 1.54380464553833


Validation: |                                                                                                 …

val/accuracy: 0.7922014594078064
val/f1: 0.7920860648155212
val/taskclf_loss: 0.5994173884391785
val/loss: 0.7651164531707764
val/mlm_loss: 1.5457713603973389


Validation: |                                                                                                 …

val/accuracy: 0.7809147834777832
val/f1: 0.7799599170684814
val/taskclf_loss: 0.6657858490943909
val/loss: 0.8183399438858032
val/mlm_loss: 1.5370649099349976


Validation: |                                                                                                 …

val/accuracy: 0.797078549861908
val/f1: 0.7969635128974915
val/taskclf_loss: 0.6925080418586731
val/loss: 0.8350503444671631
val/mlm_loss: 1.506607174873352


Validation: |                                                                                                 …

val/accuracy: 0.7986510396003723
val/f1: 0.7985748648643494
val/taskclf_loss: 0.7234730124473572
val/loss: 0.8619572520256042
val/mlm_loss: 1.5143957138061523


Validation: |                                                                                                 …

val/accuracy: 0.7965996265411377
val/f1: 0.7963598966598511
val/taskclf_loss: 0.7569622993469238
val/loss: 0.8910716772079468
val/mlm_loss: 1.5228984355926514


Validation: |                                                                                                 …

val/accuracy: 0.7985312938690186
val/f1: 0.798602283000946
val/taskclf_loss: 0.736662745475769
val/loss: 0.8719868063926697
val/mlm_loss: 1.5095363855361938


Validation: |                                                                                                 …

`Trainer.fit` stopped: `max_epochs=10` reached.


val/accuracy: 0.7993693947792053
val/f1: 0.7994078993797302
val/taskclf_loss: 0.736764132976532
val/loss: 0.8710662722587585
val/mlm_loss: 1.5038015842437744


Best checkpoint path: checkpoints/lightning_logs/version_3/checkpoints/task-TEFUni-epoch=01-val_loss=0.72.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_3/checkpoints/TEFUni-epoch=05.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Source genre: telephone
Target genre: fiction
Number of target samples: 69613


Testing: |                                                                                                    …

Test Results Last Epoch: [{'source_test/loss': 0.7598740458488464, 'source_test/accuracy': 0.8023713231086731, 'source_test/f1': 0.8009408712387085, 'source_test/f1_macro': 0.7937368154525757, 'source_test/f1_micro': 0.8023713231086731, 'target_test/loss': 0.8725507855415344, 'target_test/accuracy': 0.7670890688896179, 'target_test/f1': 0.7671659588813782, 'target_test/f1_macro': 0.7569380402565002, 'target_test/f1_micro': 0.7670890688896179}]
Best checkpoint path: checkpoints/lightning_logs/version_3/checkpoints/task-TEFUni-epoch=01-val_loss=0.72.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_3/checkpoints/TEFUni-epoch=05.ckpt


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/lightning_fabric/utilities/cloud_io.py:57: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results on Best Model: [{'source_test/loss': 0.5397560596466064, 'source_test/accuracy': 0.7885464429855347, 'source_test/f1': 0.7892871499061584, 'source_test/f1_macro': 0.7786803245544434, 'source_test/f1_micro': 0.7885464429855347, 'target_test/loss': 0.5886291861534119, 'target_test/accuracy': 0.7602726221084595, 'target_test/f1': 0.7598181962966919, 'target_test/f1_macro': 0.7520350217819214, 'target_test/f1_micro': 0.7602726221084595}]


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Seed set to 10


Test Results on saved epoch: [{'source_test/loss': 0.7274371385574341, 'source_test/accuracy': 0.7907785773277283, 'source_test/f1': 0.7891812920570374, 'source_test/f1_macro': 0.7819093465805054, 'source_test/f1_micro': 0.7907785773277283, 'target_test/loss': 0.8438490629196167, 'target_test/accuracy': 0.7600326538085938, 'target_test/f1': 0.7592214345932007, 'target_test/f1_macro': 0.7514703869819641, 'target_test/f1_micro': 0.7600326538085938}]
Batch size: 32


Source genre: telephone


Target genre: fiction
Number of target samples: 69613


Source genre: telephone
Target genre: fiction
Number of target samples: 69613
Source dataset length: 75013
Target dataset length: 15922


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Using 16bit Automatic Mixed Precision (AMP)


GPU available: True (cuda), used: True


TPU available: False, using: 0 TPU cores


IPU available: False, using: 0 IPUs


HPU available: False, using: 0 HPUs


`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..


`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..


`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | BertAdapterModel   | 119 M 
1 | criterion     | CrossEntropyLoss   | 0     
2 | mlm_criterion | CrossEntropyLoss   | 0     
3 | accuracy      | MulticlassAccuracy | 0     
4 | f1            | MulticlassF1Score  | 0     
5 | f1_macro      | MulticlassF1Score  | 0     
6 | f1_micro      | MulticlassF1Score  | 0     
7 | softmax       | Softmax            | 0     
-----------------------------------------------------
9.5 M     Trainable params
109 M     Non-trainable params
119 M     Total params
476.013   Total estimated model params size (MB)


eee


Sanity Checking: |                                                                                            …

val/accuracy: 0.265625
val/f1: 0.26492899656295776
val/taskclf_loss: 1.10595703125
val/loss: 1.2013025283813477
val/mlm_loss: 1.650502324104309


Training: |                                                                                                   …

Validation: |                                                                                                 …

val/accuracy: 0.7719508409500122
val/f1: 0.772009015083313
val/taskclf_loss: 0.5591091513633728
val/loss: 0.7327139377593994
val/mlm_loss: 1.550614356994629


Validation: |                                                                                                 …

val/accuracy: 0.7871407866477966
val/f1: 0.7874405384063721
val/taskclf_loss: 0.5360803604125977
val/loss: 0.7109161019325256
val/mlm_loss: 1.534616470336914


Validation: |                                                                                                 …

val/accuracy: 0.7889527678489685
val/f1: 0.7890942692756653
val/taskclf_loss: 0.5744337439537048
val/loss: 0.7452303171157837
val/mlm_loss: 1.5499008893966675


Validation: |                                                                                                 …

val/accuracy: 0.7943246960639954
val/f1: 0.7946635484695435
val/taskclf_loss: 0.5849035382270813
val/loss: 0.7522625923156738
val/mlm_loss: 1.5407379865646362


Validation: |                                                                                                 …

val/accuracy: 0.788609504699707
val/f1: 0.7890686988830566
val/taskclf_loss: 0.631690263748169
val/loss: 0.7886381149291992
val/mlm_loss: 1.5280630588531494


Validation: |                                                                                                 …

val/accuracy: 0.795074999332428
val/f1: 0.7953231334686279
val/taskclf_loss: 0.6902458667755127
val/loss: 0.8361963629722595
val/mlm_loss: 1.5238101482391357


Validation: |                                                                                                 …

val/accuracy: 0.7947158217430115
val/f1: 0.7947986125946045
val/taskclf_loss: 0.7402161955833435
val/loss: 0.8765131235122681
val/mlm_loss: 1.5186463594436646


Validation: |                                                                                                 …

val/accuracy: 0.7941171526908875
val/f1: 0.7942629456520081
val/taskclf_loss: 0.7542906999588013
val/loss: 0.8877645134925842
val/mlm_loss: 1.5165972709655762


Validation: |                                                                                                 …

val/accuracy: 0.7996407747268677
val/f1: 0.7999874353408813
val/taskclf_loss: 0.7371837496757507
val/loss: 0.8751565217971802
val/mlm_loss: 1.525184988975525


Validation: |                                                                                                 …

`Trainer.fit` stopped: `max_epochs=10` reached.


val/accuracy: 0.7997605204582214
val/f1: 0.8000461459159851
val/taskclf_loss: 0.7409834265708923
val/loss: 0.8761564493179321
val/mlm_loss: 1.5129941701889038


Best checkpoint path: checkpoints/lightning_logs/version_4/checkpoints/task-TEFUni-epoch=01-val_loss=0.71.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_4/checkpoints/TEFUni-epoch=05.ckpt


Source genre: telephone
Target genre: fiction
Number of target samples: 69613


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results Last Epoch: [{'source_test/loss': 0.7347378730773926, 'source_test/accuracy': 0.8036434054374695, 'source_test/f1': 0.802412211894989, 'source_test/f1_macro': 0.794808030128479, 'source_test/f1_micro': 0.8036434054374695, 'target_test/loss': 0.8554263710975647, 'target_test/accuracy': 0.7786818146705627, 'target_test/f1': 0.7786355018615723, 'target_test/f1_macro': 0.7686328887939453, 'target_test/f1_micro': 0.7786818146705627}]
Best checkpoint path: checkpoints/lightning_logs/version_4/checkpoints/task-TEFUni-epoch=01-val_loss=0.71.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_4/checkpoints/TEFUni-epoch=05.ckpt


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results on Best Model: [{'source_test/loss': 0.5352539420127869, 'source_test/accuracy': 0.7832180857658386, 'source_test/f1': 0.7823570966720581, 'source_test/f1_macro': 0.7739241123199463, 'source_test/f1_micro': 0.7832180857658386, 'target_test/loss': 0.5987016558647156, 'target_test/accuracy': 0.7602726221084595, 'target_test/f1': 0.7597094178199768, 'target_test/f1_macro': 0.7519553303718567, 'target_test/f1_micro': 0.7602726221084595}]


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Seed set to 100


Test Results on saved epoch: [{'source_test/loss': 0.6881586909294128, 'source_test/accuracy': 0.8026353716850281, 'source_test/f1': 0.8014659285545349, 'source_test/f1_macro': 0.7941883206367493, 'source_test/f1_micro': 0.8026353716850281, 'target_test/loss': 0.8096601963043213, 'target_test/accuracy': 0.7691051959991455, 'target_test/f1': 0.7683475613594055, 'target_test/f1_macro': 0.7599045038223267, 'target_test/f1_micro': 0.7691051959991455}]
Batch size: 32


Source genre: telephone


Target genre: fiction
Number of target samples: 69613


Source genre: telephone
Target genre: fiction
Number of target samples: 69613


Source dataset length: 75013
Target dataset length: 15922


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Using 16bit Automatic Mixed Precision (AMP)


GPU available: True (cuda), used: True


TPU available: False, using: 0 TPU cores


IPU available: False, using: 0 IPUs


HPU available: False, using: 0 HPUs


`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..


`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..


`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | BertAdapterModel   | 119 M 
1 | criterion     | CrossEntropyLoss   | 0     
2 | mlm_criterion | CrossEntropyLoss   | 0     
3 | accuracy      | MulticlassAccuracy | 0     
4 | f1            | MulticlassF1Score  | 0     
5 | f1_macro      | MulticlassF1Score  | 0     
6 | f1_micro      | MulticlassF1Score  | 0     
7 | softmax       | Softmax            | 0     
-----------------------------------------------------
9.5 M     Trainable params
109 M     Non-trainable params
119 M     Total params
476.013   Total estimated model params size (MB)


eee


Sanity Checking: |                                                                                            …

val/accuracy: 0.390625
val/f1: 0.5528455376625061
val/taskclf_loss: 1.0911331176757812
val/loss: 1.1996588706970215
val/mlm_loss: 1.710953712463379


Training: |                                                                                                   …

Validation: |                                                                                                 …

val/accuracy: 0.7673850655555725
val/f1: 0.7667611241340637
val/taskclf_loss: 0.5684815645217896
val/loss: 0.7400967478752136
val/mlm_loss: 1.5486239194869995


Validation: |                                                                                                 …

val/accuracy: 0.7884578704833984
val/f1: 0.7884612679481506
val/taskclf_loss: 0.5425862073898315
val/loss: 0.7189757823944092
val/mlm_loss: 1.5499963760375977


Validation: |                                                                                                 …

val/accuracy: 0.7924409508705139
val/f1: 0.7926195859909058
val/taskclf_loss: 0.5688442587852478
val/loss: 0.7391226887702942
val/mlm_loss: 1.5413520336151123


Validation: |                                                                                                 …

val/accuracy: 0.7929198741912842
val/f1: 0.7927234172821045
val/taskclf_loss: 0.6038262248039246
val/loss: 0.7677795886993408
val/mlm_loss: 1.5402095317840576


Validation: |                                                                                                 …

val/accuracy: 0.7919620275497437
val/f1: 0.7920749187469482
val/taskclf_loss: 0.6187247037887573
val/loss: 0.7795928716659546
val/mlm_loss: 1.5374877452850342


Validation: |                                                                                                 …

val/accuracy: 0.8011813759803772
val/f1: 0.8007678389549255
val/taskclf_loss: 0.6874006986618042
val/loss: 0.8342458009719849
val/mlm_loss: 1.5260741710662842


Validation: |                                                                                                 …

val/accuracy: 0.7969907522201538
val/f1: 0.7965422868728638
val/taskclf_loss: 0.727010190486908
val/loss: 0.8660925626754761
val/mlm_loss: 1.5213487148284912


Validation: |                                                                                                 …

val/accuracy: 0.7985472679138184
val/f1: 0.7983219623565674
val/taskclf_loss: 0.7519932389259338
val/loss: 0.884351909160614
val/mlm_loss: 1.507930874824524


Validation: |                                                                                                 …

val/accuracy: 0.8009418845176697
val/f1: 0.8009002208709717
val/taskclf_loss: 0.7321994304656982
val/loss: 0.8691697716712952
val/mlm_loss: 1.5144755840301514


Validation: |                                                                                                 …

`Trainer.fit` stopped: `max_epochs=10` reached.


val/accuracy: 0.8005827069282532
val/f1: 0.8006137013435364
val/taskclf_loss: 0.7328620553016663
val/loss: 0.8677943348884583
val/mlm_loss: 1.5034980773925781


Best checkpoint path: checkpoints/lightning_logs/version_5/checkpoints/task-TEFUni-epoch=01-val_loss=0.72.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_5/checkpoints/TEFUni-epoch=05.ckpt


Source genre: telephone
Target genre: fiction
Number of target samples: 69613


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results Last Epoch: [{'source_test/loss': 0.7423366904258728, 'source_test/accuracy': 0.8013872504234314, 'source_test/f1': 0.8002027273178101, 'source_test/f1_macro': 0.7917925715446472, 'source_test/f1_micro': 0.8013872504234314, 'target_test/loss': 0.8327508568763733, 'target_test/accuracy': 0.7796899080276489, 'target_test/f1': 0.7793402671813965, 'target_test/f1_macro': 0.7712745070457458, 'target_test/f1_micro': 0.7796899080276489}]
Best checkpoint path: checkpoints/lightning_logs/version_5/checkpoints/task-TEFUni-epoch=01-val_loss=0.72.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_5/checkpoints/TEFUni-epoch=05.ckpt


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results on Best Model: [{'source_test/loss': 0.541084885597229, 'source_test/accuracy': 0.7885224223136902, 'source_test/f1': 0.7875929474830627, 'source_test/f1_macro': 0.7782339453697205, 'source_test/f1_micro': 0.7885224223136902, 'target_test/loss': 0.596006453037262, 'target_test/accuracy': 0.754200279712677, 'target_test/f1': 0.753216028213501, 'target_test/f1_macro': 0.7471886873245239, 'target_test/f1_micro': 0.754200279712677}]


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results on saved epoch: [{'source_test/loss': 0.6892821788787842, 'source_test/accuracy': 0.8018913269042969, 'source_test/f1': 0.8011375069618225, 'source_test/f1_macro': 0.7921610474586487, 'source_test/f1_micro': 0.8018913269042969, 'target_test/loss': 0.7867249250411987, 'target_test/accuracy': 0.7769057154655457, 'target_test/f1': 0.7761200666427612, 'target_test/f1_macro': 0.768801748752594, 'target_test/f1_micro': 0.7769057154655457}]


In [6]:
results.items()

dict_items([('last_epoch', {'source_test/loss': [0.7598740458488464, 0.7347378730773926, 0.7423366904258728], 'source_test/accuracy': [0.8023713231086731, 0.8036434054374695, 0.8013872504234314], 'source_test/f1': [0.8009408712387085, 0.802412211894989, 0.8002027273178101], 'source_test/f1_macro': [0.7937368154525757, 0.794808030128479, 0.7917925715446472], 'source_test/f1_micro': [0.8023713231086731, 0.8036434054374695, 0.8013872504234314], 'target_test/loss': [0.8725507855415344, 0.8554263710975647, 0.8327508568763733], 'target_test/accuracy': [0.7670890688896179, 0.7786818146705627, 0.7796899080276489], 'target_test/f1': [0.7671659588813782, 0.7786355018615723, 0.7793402671813965], 'target_test/f1_macro': [0.7569380402565002, 0.7686328887939453, 0.7712745070457458], 'target_test/f1_micro': [0.7670890688896179, 0.7786818146705627, 0.7796899080276489]}), ('best_model', {'source_test/loss': [0.5397560596466064, 0.5352539420127869, 0.541084885597229], 'source_test/accuracy': [0.78854644

In [7]:
# Calculate mean and standard deviation for each scenario
mean_results = {scenario: {key: np.mean(values) for key, values in metrics.items()} for scenario, metrics in results.items()}
std_results = {scenario: {key: np.std(values) for key, values in metrics.items()} for scenario, metrics in results.items()}

# Log mean and standard deviation results to wandb
# wandb.init(project=project_name, name=f'{domain}_mean_results')
# for scenario in mean_results:
#     for key, value in mean_results[scenario].items():
#         wandb.log({f"{scenario}/{key}": value})
#         wandb.log({f"{scenario}/{key}_std": std_results[scenario][key]})
# wandb.finish()

print("Mean Results:", mean_results)
print("Standard Deviation Results:", std_results)

# # Save the best model's adapter
# if model:
#     adapter_save_path = f"../../saved/adapter_after_run/{hparams['task_adapter_name']}"
#     model.save_adapter(adapter_save_path, hparams['task_adapter_name'])
#     print(f"Adapter saved to {adapter_save_path}")
# else:
#     print("No best model to save.")

Mean Results: {'last_epoch': {'source_test/loss': 0.745649536450704, 'source_test/accuracy': 0.8024673263231913, 'source_test/f1': 0.8011852701505026, 'source_test/f1_macro': 0.7934458057085673, 'source_test/f1_micro': 0.8024673263231913, 'target_test/loss': 0.8535760045051575, 'target_test/accuracy': 0.7751535971959432, 'target_test/f1': 0.775047242641449, 'target_test/f1_macro': 0.7656151453653971, 'target_test/f1_micro': 0.7751535971959432}, 'best_model': {'source_test/loss': 0.5386982957522074, 'source_test/accuracy': 0.7867623170216879, 'source_test/f1': 0.7864123980204264, 'source_test/f1_macro': 0.7769461274147034, 'source_test/f1_micro': 0.7867623170216879, 'target_test/loss': 0.5944457650184631, 'target_test/accuracy': 0.758248507976532, 'target_test/f1': 0.7575812141100565, 'target_test/f1_macro': 0.750393013159434, 'target_test/f1_micro': 0.758248507976532}, 'epoch_saved': {'source_test/loss': 0.7016260027885437, 'source_test/accuracy': 0.7984350919723511, 'source_test/f1': 

In [8]:
print('dones')

dones


In [9]:
best_val_loss

inf