In [1]:

import torch
import gc
torch.cuda.empty_cache()
gc.collect()


0

In [2]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
# Initialize the console

# Step 2: Import necessary libraries
from typing import Optional, Dict, Any
import os
import torch
import pytorch_lightning as pl
import pandas as pd
from transformers import AutoTokenizer
from torch.utils.data import Dataset, DataLoader
from rich.traceback import install
from pytorch_lightning.loggers import WandbLogger
import torch.nn as nn
import torch.optim as optim
from transformers import AutoConfig
from adapters import AutoAdapterModel, AdapterConfig
from adapters.composition import Stack
from pytorch_lightning import seed_everything, Trainer
from pytorch_lightning.callbacks import ModelCheckpoint,EarlyStopping
import torchmetrics

install(show_locals=True)

from setup import setup_src_path
print(setup_src_path())
import data.processed as processed
import config.config as config
import utils.setup as setup
import utils.functions as fn
from importlib import reload

from datasets import load_from_disk

print(config.Config.TXT_SAVE_PATH)
print(config.Config.MODEL_SAVE_PATH)

dataset = load_from_disk(f"../{config.Config.DATASETS_SAVE_PATH}/datasets")


['/usr/lib/python38.zip', '/usr/lib/python3.8', '/usr/lib/python3.8/lib-dynload', '', '/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages', '/tmp/tmp3azv1ji1', '/home/guest/Desktop/projects/third-experiments/domain_adaptation_project/modules']


2024-09-23 13:00:34.402706: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-09-23 13:00:34.537847: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.




./text-files/
./hp-model-


In [3]:
import torch
import os
import pytorch_lightning as pl
from transformers import  AutoConfig, DataCollatorForLanguageModeling
from collections import defaultdict
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
import numpy as np
import torchmetrics

class JointDomainTaskAdapter(pl.LightningModule):
    def __init__(self, hparams,source_dataset_length,target_dataset_length):
        super(JointDomainTaskAdapter, self).__init__()
        self.save_hyperparameters(hparams)

        # Load config with hidden states output
        self.config = AutoConfig.from_pretrained(self.hparams["pretrained_model_name"])
        self.config.output_hidden_states = True
        self.model = AutoAdapterModel.from_pretrained(self.hparams["pretrained_model_name"], config=self.config)

        # Set reduction factor and leave_out layers
        self.reduction_factor = self.hparams.get("reduction_factor", 16)
        self.leave_out = self.hparams.get("leave_out", [])
        # if self.leave_out != "None":
        #     self.leave_out = self.leave_out.split(",")
        #     self.leave_out = [int(i) for i in self.leave_out]
        # else:
        #     self.leave_out = []

        # Load MLM adapter with head
        self.model.load_adapter(f"{self.hparams['saved_adapter_dir']}/{self.hparams['domain_adapter_name']}", with_head=True)

        # Add classification head for the task
        self.model.add_classification_head(f"{self.hparams['task_adapter_name']}", num_labels=self.hparams["num_classes"])

        # Set active adapters
        self.model.train_adapter(self.hparams['domain_adapter_name'])
        # Calculate alpha based on dataset lengths
        self.alpha = source_dataset_length / (source_dataset_length + target_dataset_length)

        # Initialize loss functions and metrics
        self.criterion = nn.CrossEntropyLoss()
        self.mlm_criterion = nn.CrossEntropyLoss()
        self.accuracy = torchmetrics.Accuracy(task='multiclass', num_classes=self.hparams["num_classes"])
        self.f1 = torchmetrics.F1Score(task='multiclass', num_classes=self.hparams["num_classes"], average="weighted")
        self.f1_macro = torchmetrics.F1Score(task='multiclass', num_classes=self.hparams["num_classes"], average="macro")
        self.f1_micro = torchmetrics.F1Score(task='multiclass',num_classes=self.hparams["num_classes"], average="micro")

        self.softmax = nn.Softmax(dim=1)
        self.validation_outputs = []
        self.test_outputs = []
        # Optimizer related variables
        self.learning_rate = self.hparams.get("learning_rate", 1e-4)
        self.scheduler_factor = self.hparams.get("scheduler_factor", 0.1)
        self.scheduler_patience = self.hparams.get("scheduler_patience", 0.05)
        self.scheduler_threshold = self.hparams.get("scheduler_threshold", 0.0001)
        self.scheduler_cooldown = self.hparams.get("scheduler_cooldown", 0)
        self.scheduler_eps = self.hparams.get("scheduler_eps", 1e-8)

    def forward(self, input_ids, attention_mask=None, labels=None, task=None):
        if task == "mlm":
            self.model.active_head= self.hparams['domain_adapter_name']
            outputs = self.model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        elif task == "classification":
            self.model.active_head= self.hparams['task_adapter_name']
            outputs = self.model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        else:
            raise ValueError("Task must be either 'mlm' or 'classification'.")
        return outputs

    def training_step(self, batch, batch_idx):
        # Separate source and target data processing
        source_input_ids = batch["source_input_ids"]
        source_attention_mask = batch["source_attention_mask"]
        source_labels = batch["label_source"]

        target_input_ids = batch["target_input_ids"]
        target_attention_mask = batch["target_attention_mask"]
        mlm_labels = batch["mlm_labels"]

        # Calculate dynamic alpha based on the lengths of source and target data
        alpha = self.alpha

        # Classification task
        cls_outputs = self(input_ids=source_input_ids, attention_mask=source_attention_mask, task="classification")
        cls_logits = cls_outputs.logits
        task_loss = self.criterion(cls_logits, source_labels)

        # MLM task
        mlm_outputs = self(input_ids=target_input_ids, attention_mask=target_attention_mask, labels=mlm_labels, task="mlm")
        mlm_loss = mlm_outputs.loss

        # Combine losses
        loss = alpha * task_loss + (1 - alpha) * mlm_loss

        accuracy = self.accuracy(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))
        f1 = self.f1(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))

        metrics = {
            "train/accuracy": accuracy,
            "train/f1": f1,
            "train/taskclf_loss": task_loss,
            "train/loss": loss,
            "train/mlm_loss": mlm_loss
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)

        return loss

    def validation_step(self, batch, batch_idx):
        # Separate source and target data processing
        source_input_ids = batch["source_input_ids"]
        source_attention_mask = batch["source_attention_mask"]
        source_labels = batch["label_source"]

        target_input_ids = batch["target_input_ids"]
        target_attention_mask = batch["target_attention_mask"]
        mlm_labels = batch["mlm_labels"]

        # Calculate dynamic alpha based on the lengths of source and target data
        alpha = self.alpha
        # Classification task
        cls_outputs = self(input_ids=source_input_ids, attention_mask=source_attention_mask, task="classification")
        cls_logits = cls_outputs.logits
        task_loss = self.criterion(cls_logits, source_labels)

        # MLM task
        mlm_outputs = self(input_ids=target_input_ids, attention_mask=target_attention_mask, labels=mlm_labels, task="mlm")
        mlm_loss = mlm_outputs.loss

        # Combine losses
        loss = alpha * task_loss + (1 - alpha) * mlm_loss

        accuracy = self.accuracy(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))
        f1 = self.f1(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))
        self.validation_outputs.append({
            "val/accuracy": accuracy,
            "val/f1": f1,
            "val/taskclf_loss": task_loss,
            "val/loss": loss,
            "val/mlm_loss": mlm_loss
                })
        
        metrics = {
            "val/accuracy": accuracy,
            "val/f1": f1,
            "val/taskclf_loss": task_loss,
            "val/loss": loss,
            "val/mlm_loss": mlm_loss
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)

        return metrics
    def on_validation_epoch_start(self):
        self.validation_outputs = []
    
    def on_validation_epoch_end(self):
        outputs= self.validation_outputs

        avg_loss = torch.stack([x["val/loss"] for x in outputs]).mean()
        avg_task_loss = torch.stack([x["val/taskclf_loss"] for x in outputs]).mean()
        avg_mlm_loss = torch.stack([x["val/mlm_loss"] for x in outputs]).mean()
        avg_accuracy = torch.stack([x["val/accuracy"] for x in outputs]).mean()
        avg_f1 = torch.stack([x["val/f1"] for x in outputs]).mean()
        print(f"val/accuracy: {avg_accuracy}")
        print(f"val/f1: {avg_f1}")
        print(f"val/taskclf_loss: {avg_task_loss}")
        print(f"val/loss: {avg_loss}")
        print(f"val/mlm_loss: {avg_mlm_loss}")
        metrics = {
            "val/avg_loss": avg_loss,
            "val/avg_taskclf_loss": avg_task_loss,
            "val/avg_mlm_loss": avg_mlm_loss,
            "val/avg_accuracy": avg_accuracy,
            "val/avg_f1": avg_f1,
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)
        self.log("val_loss", avg_loss)

    def test_step(self, batch, batch_idx):
        # Separate source and target data processing
        source_input_ids = batch["source_input_ids"]
        source_attention_mask = batch["source_attention_mask"]
        source_labels = batch["label_source"]

        target_input_ids = batch["target_input_ids"]
        target_attention_mask = batch["target_attention_mask"]
        target_labels = batch["label_target"]

        # Classification task for source data
        cls_outputs_source = self(input_ids=source_input_ids, attention_mask=source_attention_mask, task="classification")
        cls_logits_source = cls_outputs_source.logits
        task_loss_source = self.criterion(cls_logits_source, source_labels)

        # Classification task for target data
        cls_outputs_target = self(input_ids=target_input_ids, attention_mask=target_attention_mask, task="classification")
        cls_logits_target = cls_outputs_target.logits
        task_loss_target = self.criterion(cls_logits_target, target_labels)

        # Combine losses (though typically you would evaluate them separately)
        loss = task_loss_source + task_loss_target

        accuracy_source = self.accuracy(source_labels, torch.argmax(self.softmax(cls_logits_source), dim=1))
        f1_source = self.f1(source_labels, torch.argmax(self.softmax(cls_logits_source), dim=1))
        f1_macro_source = self.f1_macro(source_labels, torch.argmax(self.softmax(cls_logits_source), dim=1))
        f1_micro_source = self.f1_micro(source_labels, torch.argmax(self.softmax(cls_logits_source), dim=1))
    
        accuracy_target = self.accuracy(target_labels, torch.argmax(self.softmax(cls_logits_target), dim=1))
        f1_target = self.f1(target_labels, torch.argmax(self.softmax(cls_logits_target), dim=1))
        f1_macro_target = self.f1_macro(target_labels, torch.argmax(self.softmax(cls_logits_target), dim=1))
        f1_micro_target = self.f1_micro(target_labels, torch.argmax(self.softmax(cls_logits_target), dim=1))

        metrics = {
            "source_test/loss": task_loss_source,
            "source_test/accuracy": accuracy_source,
            "source_test/f1": f1_source,
            "source_test/f1_macro": f1_macro_source,
            "source_test/f1_micro": f1_micro_source,
            "target_test/loss": task_loss_target,
            "target_test/accuracy": accuracy_target,
            "target_test/f1": f1_target,
            "target_test/f1_macro": f1_macro_target,
            "target_test/f1_micro": f1_micro_target,
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)

        self.test_outputs.append({
            "source_test/loss": task_loss_source,
            "source_test/accuracy": accuracy_source,
            "source_test/f1": f1_source,
            "source_test/f1_macro": f1_macro_source,
            "source_test/f1_micro": f1_micro_source,
            "target_test/loss": task_loss_target,
            "target_test/accuracy": accuracy_target,
            "target_test/f1": f1_target,
            "target_test/f1_macro": f1_macro_target,
            "target_test/f1_micro": f1_micro_target,
        })
        return metrics
    def on_test_epoch_start(self):
        self.test_outputs = []

    def on_test_epoch_end(self):
        outputs=  self.test_outputs

        avg_loss_source = torch.stack([x["source_test/loss"] for x in outputs]).mean()
        avg_task_loss_target = torch.stack([x["target_test/loss"] for x in outputs]).mean()
        avg_accuracy_source = torch.stack([x["source_test/accuracy"] for x in outputs]).mean()
        avg_f1_source = torch.stack([x["source_test/f1"] for x in outputs]).mean()
        avg_f1_macro_source = torch.stack([x["source_test/f1_macro"] for x in outputs]).mean()
        avg_f1_micro_source = torch.stack([x["source_test/f1_micro"] for x in outputs]).mean()

        avg_accuracy_target = torch.stack([x["target_test/accuracy"] for x in outputs]).mean()
        avg_f1_target = torch.stack([x["target_test/f1"] for x in outputs]).mean()
        avg_f1_macro_target = torch.stack([x["target_test/f1_macro"] for x in outputs]).mean()
        avg_f1_micro_target = torch.stack([x["target_test/f1_micro"] for x in outputs]).mean()

        metrics = {
            "source_test/loss": avg_loss_source,
            "target_test/loss": avg_task_loss_target,
            "source_test/accuracy": avg_accuracy_source,
            "source_test/f1": avg_f1_source,
            "source_test/f1_macro": avg_f1_macro_source,
            "source_test/f1_micro": avg_f1_micro_source,
            "target_test/accuracy": avg_accuracy_target,
            "target_test/f1": avg_f1_target,
            "target_test/f1_macro": avg_f1_macro_target,
            "target_test/f1_micro": avg_f1_micro_target,
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)
    def save_adapter(self, location, adapter_name):
        self.model.save_adapter(location, adapter_name)

    def configure_optimizers(self):
        optimizer = optim.AdamW(self.parameters(), lr=self.hparams["learning_rate"])
        lr_scheduler = {
            'scheduler': optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=2, threshold=0.0001, cooldown=0, min_lr=1e-8),
            'monitor': 'val_loss'
        }
        return [optimizer], [lr_scheduler]


In [4]:
import wandb

wandb.login()
# Wandb setup and training loop
seeds = [42, 10, 100]  # List of seeds
project_name = 'mixed_edited'  # Replace with your wandb project name
domain = 'SF'  # Replace with the specific domain for this notebook
type = 'union'  # Replace with the specific type for this notebook

# Initialize results dictionary
results = {
    "last_epoch": {
        "source_test/loss": [],
        "source_test/accuracy": [],
        "source_test/f1": [],
        "source_test/f1_macro": [],
        "source_test/f1_micro": [],
        "target_test/loss": [],
        "target_test/accuracy": [],
        "target_test/f1": [],
        "target_test/f1_macro": [],
        "target_test/f1_micro": [],
    },
    "best_model": {
        "source_test/loss": [],
        "source_test/accuracy": [],
        "source_test/f1": [],
        "source_test/f1_macro": [],
        "source_test/f1_micro": [],
        "target_test/loss": [],
        "target_test/accuracy": [],
        "target_test/f1": [],
        "target_test/f1_macro": [],
        "target_test/f1_micro": [],
    },
    "epoch_saved": {
        "source_test/loss": [],
        "source_test/accuracy": [],
        "source_test/f1": [],
        "source_test/f1_macro": [],
        "source_test/f1_micro": [],
        "target_test/loss": [],
        "target_test/accuracy": [],
        "target_test/f1": [],
        "target_test/f1_macro": [],
        "target_test/f1_micro": [],
    }
}

best_val_loss = float('inf')
best_model = None
best_model_path = ""

[34m[1mwandb[0m: Currently logged in as: [33mmrawhani5[0m ([33mmrawhani[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [5]:
reload(processed)
for seed in seeds:
    #wandb.init(project=project_name, name=f'{domain}_{type}_run_with_seed_{seed}', config={'seed': seed})

    try:
        seed_everything(seed)

        hparams = {
            "source_target": "slate_fiction",
            "source_domain": "slate",
            "target_domain": "fiction",
            "domain_adapter_name": "mlm_union_F",
            "task_adapter_name": "SFUni",
            "pretrained_model_name": "bert-base-uncased",
            "padding": "max_length",
            "max_seq_length": 128,
            "bsz": 32,
            "num_classes": 3,
            "learning_rate": 1e-4,
            "reduction_factor": 16,
            "mode": "domain",
            "saved_adapter_dir": "../../saved/adapters",
        }

        save_dir = "checkpoints"
        save_epoch_3 = 6  # Save model at the 3rd epoch
        
        dm = processed.DataModuleSourceTargetMixed(hparams)
        dm.setup('fit')
        dm.setup("test")
        source_length, target_length = dm.get_dataset_lengths()
        print(f"Source dataset length: {source_length}")
        print(f"Target dataset length: {target_length}")
        model = JointDomainTaskAdapter(hparams,source_length,target_length)

        checkpoint_callback = ModelCheckpoint(
            filename="task-SFUni-{epoch:02d}-{val_loss:.2f}",
            save_top_k=1,
            monitor="val_loss",
            mode="min",
        )
        save_model_callback_epoch = ModelCheckpoint(
            filename="SFUni-{epoch:02d}",
            every_n_epochs=save_epoch_3,
            save_top_k=-1,
        )

        #wandb_logger = WandbLogger()

    except Exception as e:
        print(f"Error during preprocessing : {e}")

    try:
        train_loader = dm.train_dataloader()
        val_loader = dm.val_dataloader()
        trainer = Trainer(
            max_epochs=10,
            accelerator="auto",
            precision=16,
            
            default_root_dir="checkpoints",
            #logger=wandb_logger,
            callbacks=[checkpoint_callback, save_model_callback_epoch],
            limit_train_batches=1.0,
            limit_val_batches=1.0,
            limit_test_batches=1.0,
        )

        trainer.fit(model, train_loader, val_loader)
        print(f"Best checkpoint path: {checkpoint_callback.best_model_path}")
        print(f"Saved epoch checkpoint path: {save_model_callback_epoch.best_model_path}")
    except Exception as e:
        print(f"Error during training : {e}")

    try:
        dm.setup("test")
        test_loader = dm.test_dataloader()
        test_results_last = trainer.test(model, test_loader)
        print("Test Results Last Epoch:", test_results_last)

        # Collect results for last epoch model
        for key, value in test_results_last[0].items():
            results["last_epoch"][key].append(value)

        # Paths to the saved checkpoints
        best_checkpoint_path = checkpoint_callback.best_model_path
        saved_epoch_checkpoint_path = save_model_callback_epoch.best_model_path
        print(f"Best checkpoint path: {best_checkpoint_path}")
        print(f"Saved epoch checkpoint path: {saved_epoch_checkpoint_path}")

        best_model = JointDomainTaskAdapter.load_from_checkpoint(best_checkpoint_path,source_dataset_length=source_length, target_dataset_length=target_length)
        test_results_best = trainer.test(best_model, test_loader)
        print("Test Results on Best Model:", test_results_best)
        for key, value in test_results_best[0].items():
            results["best_model"][key].append(value)

        saved_epoch_model = JointDomainTaskAdapter.load_from_checkpoint(saved_epoch_checkpoint_path,source_dataset_length=source_length, target_dataset_length=target_length)
        test_results_saved_epoch = trainer.test(saved_epoch_model, test_loader)
        print("Test Results on saved epoch:", test_results_saved_epoch)
        for key, value in test_results_saved_epoch[0].items():
            results["epoch_saved"][key].append(value)

    except Exception as e:
        print(f"Error during testing: {e}")

    #wandb.finish()

Seed set to 42




Batch size: 32


Source genre: slate
Target genre: fiction
Number of target samples: 69613


Source genre: slate
Target genre: fiction
Number of target samples: 69613
Source dataset length: 69575
Target dataset length: 15922


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  state_dict = torch.load(weights_file, map_location="cpu")


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/lightning_fabric/connector.py:563: `precision=16` is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
Using 16bit Automatic Mixed Precision (AMP)


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/pytorch_lightning/plugins/precision/amp.py:55: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.
GPU available: True (cuda), used: True


TPU available: False, using: 0 TPU cores


IPU available: False, using: 0 IPUs


HPU available: False, using: 0 HPUs


`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..


`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..


`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


eee



  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | BertAdapterModel   | 119 M 
1 | criterion     | CrossEntropyLoss   | 0     
2 | mlm_criterion | CrossEntropyLoss   | 0     
3 | accuracy      | MulticlassAccuracy | 0     
4 | f1            | MulticlassF1Score  | 0     
5 | f1_macro      | MulticlassF1Score  | 0     
6 | f1_micro      | MulticlassF1Score  | 0     
7 | softmax       | Softmax            | 0     
-----------------------------------------------------
9.5 M     Trainable params
109 M     Non-trainable params
119 M     Total params
476.013   Total estimated model params size (MB)


Sanity Checking: |                                                                                            …

val/accuracy: 0.265625
val/f1: 0.29204756021499634
val/taskclf_loss: 1.1125106811523438
val/loss: 1.2015986442565918
val/mlm_loss: 1.5908902883529663


Training: |                                                                                                   …

Validation: |                                                                                                 …

val/accuracy: 0.7310923337936401
val/f1: 0.7309767603874207
val/taskclf_loss: 0.6329785585403442
val/loss: 0.8052945733070374
val/mlm_loss: 1.5582709312438965


Validation: |                                                                                                 …

val/accuracy: 0.7543361186981201
val/f1: 0.7551204562187195
val/taskclf_loss: 0.6016958951950073
val/loss: 0.7774654030799866
val/mlm_loss: 1.545532464981079


Validation: |                                                                                                 …

val/accuracy: 0.7521815896034241
val/f1: 0.7518983483314514
val/taskclf_loss: 0.6226133704185486
val/loss: 0.7942436337471008
val/mlm_loss: 1.544223427772522


Validation: |                                                                                                 …

val/accuracy: 0.7531739473342896
val/f1: 0.7533004283905029
val/taskclf_loss: 0.6454376578330994
val/loss: 0.8137199282646179
val/mlm_loss: 1.5490696430206299


Validation: |                                                                                                 …

val/accuracy: 0.7485251426696777
val/f1: 0.7479541301727295
val/taskclf_loss: 0.7404903173446655
val/loss: 0.888882040977478
val/mlm_loss: 1.5373153686523438


Validation: |                                                                                                 …

val/accuracy: 0.7639802098274231
val/f1: 0.7636767625808716
val/taskclf_loss: 0.8267722129821777
val/loss: 0.9541022181510925
val/mlm_loss: 1.510501503944397


Validation: |                                                                                                 …

val/accuracy: 0.7605344653129578
val/f1: 0.760444164276123
val/taskclf_loss: 0.8564119935035706
val/loss: 0.9779731035232544
val/mlm_loss: 1.5091632604599


Validation: |                                                                                                 …

val/accuracy: 0.762688934803009
val/f1: 0.7626813054084778
val/taskclf_loss: 0.8880738019943237
val/loss: 1.0062698125839233
val/mlm_loss: 1.5227562189102173


Validation: |                                                                                                 …

val/accuracy: 0.7629472017288208
val/f1: 0.7624722123146057
val/taskclf_loss: 0.8715952634811401
val/loss: 0.991492748260498
val/mlm_loss: 1.5154136419296265


Validation: |                                                                                                 …

`Trainer.fit` stopped: `max_epochs=10` reached.


val/accuracy: 0.7630763053894043
val/f1: 0.7626153230667114
val/taskclf_loss: 0.8666260242462158
val/loss: 0.985469400882721
val/mlm_loss: 1.5047842264175415


Best checkpoint path: checkpoints/lightning_logs/version_3/checkpoints/task-SFUni-epoch=01-val_loss=0.78.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_3/checkpoints/SFUni-epoch=05.ckpt


Source genre: slate


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Target genre: fiction
Number of target samples: 69613


Testing: |                                                                                                    …

Test Results Last Epoch: [{'source_test/loss': 0.9137237071990967, 'source_test/accuracy': 0.7501439452171326, 'source_test/f1': 0.7484152913093567, 'source_test/f1_macro': 0.7419396042823792, 'source_test/f1_micro': 0.7501439452171326, 'target_test/loss': 0.7731056809425354, 'target_test/accuracy': 0.7799059152603149, 'target_test/f1': 0.7796570658683777, 'target_test/f1_macro': 0.7703190445899963, 'target_test/f1_micro': 0.7799059152603149}]
Best checkpoint path: checkpoints/lightning_logs/version_3/checkpoints/task-SFUni-epoch=01-val_loss=0.78.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_3/checkpoints/SFUni-epoch=05.ckpt


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/lightning_fabric/utilities/cloud_io.py:57: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results on Best Model: [{'source_test/loss': 0.6424590945243835, 'source_test/accuracy': 0.7358151078224182, 'source_test/f1': 0.7368153929710388, 'source_test/f1_macro': 0.7234376072883606, 'source_test/f1_micro': 0.7358151078224182, 'target_test/loss': 0.5670212507247925, 'target_test/accuracy': 0.7686011791229248, 'target_test/f1': 0.769274890422821, 'target_test/f1_macro': 0.7588562369346619, 'target_test/f1_micro': 0.7686011791229248}]


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Seed set to 10


Test Results on saved epoch: [{'source_test/loss': 0.8880622386932373, 'source_test/accuracy': 0.748127818107605, 'source_test/f1': 0.7465293407440186, 'source_test/f1_macro': 0.7396610379219055, 'source_test/f1_micro': 0.748127818107605, 'target_test/loss': 0.7483647465705872, 'target_test/accuracy': 0.7799059152603149, 'target_test/f1': 0.7806758284568787, 'target_test/f1_macro': 0.7687079310417175, 'target_test/f1_micro': 0.7799059152603149}]
Batch size: 32


Source genre: slate
Target genre: fiction
Number of target samples: 69613


Source genre: slate
Target genre: fiction
Number of target samples: 69613
Source dataset length: 69575
Target dataset length: 15922


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Using 16bit Automatic Mixed Precision (AMP)


GPU available: True (cuda), used: True


TPU available: False, using: 0 TPU cores


IPU available: False, using: 0 IPUs


HPU available: False, using: 0 HPUs


`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..


`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..


`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | BertAdapterModel   | 119 M 
1 | criterion     | CrossEntropyLoss   | 0     
2 | mlm_criterion | CrossEntropyLoss   | 0     
3 | accuracy      | MulticlassAccuracy | 0     
4 | f1            | MulticlassF1Score  | 0     
5 | f1_macro      | MulticlassF1Score  | 0     
6 | f1_micro      | MulticlassF1Score  | 0     
7 | softmax       | Softmax            | 0     
-----------------------------------------------------
9.5 M     Trainable params
109 M     Non-trainable params
119 M     Total params
476.013   Total estimated model params size (MB)


eee


Sanity Checking: |                                                                                            …

val/accuracy: 0.359375
val/f1: 0.39193296432495117
val/taskclf_loss: 1.1017913818359375
val/loss: 1.203977108001709
val/mlm_loss: 1.650502324104309


Training: |                                                                                                   …

Validation: |                                                                                                 …

val/accuracy: 0.7320845723152161
val/f1: 0.7321654558181763
val/taskclf_loss: 0.6324458718299866
val/loss: 0.8040799498558044
val/mlm_loss: 1.5540763139724731


Validation: |                                                                                                 …

val/accuracy: 0.7513660788536072
val/f1: 0.7519127130508423
val/taskclf_loss: 0.6025936603546143
val/loss: 0.7764294743537903
val/mlm_loss: 1.5360467433929443


Validation: |                                                                                                 …

val/accuracy: 0.7545535564422607
val/f1: 0.7556578516960144
val/taskclf_loss: 0.6096982359886169
val/loss: 0.7844622731208801
val/mlm_loss: 1.548135757446289


Validation: |                                                                                                 …

val/accuracy: 0.7568779587745667
val/f1: 0.7571426033973694
val/taskclf_loss: 0.6555202603340149
val/loss: 0.8202483057975769
val/mlm_loss: 1.5400668382644653


Validation: |                                                                                                 …

val/accuracy: 0.756918728351593
val/f1: 0.7573023438453674
val/taskclf_loss: 0.7176216840744019
val/loss: 0.868807315826416
val/mlm_loss: 1.5294488668441772


Validation: |                                                                                                 …

val/accuracy: 0.766304612159729
val/f1: 0.7664219737052917
val/taskclf_loss: 0.8129998445510864
val/loss: 0.9445881247520447
val/mlm_loss: 1.5195945501327515


Validation: |                                                                                                 …

val/accuracy: 0.7661755084991455
val/f1: 0.7663761377334595
val/taskclf_loss: 0.8504650592803955
val/loss: 0.975145161151886
val/mlm_loss: 1.5199649333953857


Validation: |                                                                                                 …

val/accuracy: 0.765658974647522
val/f1: 0.7658944725990295
val/taskclf_loss: 0.8757387399673462
val/loss: 0.9938202500343323
val/mlm_loss: 1.509805679321289


Validation: |                                                                                                 …

val/accuracy: 0.7667328119277954
val/f1: 0.7664268612861633
val/taskclf_loss: 0.859468936920166
val/loss: 0.9834461212158203
val/mlm_loss: 1.5251942873001099


Validation: |                                                                                                 …

`Trainer.fit` stopped: `max_epochs=10` reached.


val/accuracy: 0.7660871744155884
val/f1: 0.765885591506958
val/taskclf_loss: 0.8626196384429932
val/loss: 0.9839819073677063
val/mlm_loss: 1.5143029689788818


Best checkpoint path: checkpoints/lightning_logs/version_4/checkpoints/task-SFUni-epoch=01-val_loss=0.78.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_4/checkpoints/SFUni-epoch=05.ckpt


Source genre: slate
Target genre: fiction
Number of target samples: 69613


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results Last Epoch: [{'source_test/loss': 0.9453220367431641, 'source_test/accuracy': 0.7461116909980774, 'source_test/f1': 0.7443251013755798, 'source_test/f1_macro': 0.7347594499588013, 'source_test/f1_micro': 0.7461116909980774, 'target_test/loss': 0.7755804657936096, 'target_test/accuracy': 0.7804339528083801, 'target_test/f1': 0.7802576422691345, 'target_test/f1_macro': 0.7716965079307556, 'target_test/f1_micro': 0.7804339528083801}]
Best checkpoint path: checkpoints/lightning_logs/version_4/checkpoints/task-SFUni-epoch=01-val_loss=0.78.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_4/checkpoints/SFUni-epoch=05.ckpt


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results on Best Model: [{'source_test/loss': 0.6571228504180908, 'source_test/accuracy': 0.7355750799179077, 'source_test/f1': 0.7351648211479187, 'source_test/f1_macro': 0.7245599031448364, 'source_test/f1_micro': 0.7355750799179077, 'target_test/loss': 0.5541521906852722, 'target_test/accuracy': 0.7743855714797974, 'target_test/f1': 0.7747167944908142, 'target_test/f1_macro': 0.7632171511650085, 'target_test/f1_micro': 0.7743855714797974}]


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Seed set to 100


Test Results on saved epoch: [{'source_test/loss': 0.8912049531936646, 'source_test/accuracy': 0.7423434853553772, 'source_test/f1': 0.7415289282798767, 'source_test/f1_macro': 0.7302813529968262, 'source_test/f1_micro': 0.7423434853553772, 'target_test/loss': 0.7412570714950562, 'target_test/accuracy': 0.7766417264938354, 'target_test/f1': 0.777521550655365, 'target_test/f1_macro': 0.7655775547027588, 'target_test/f1_micro': 0.7766417264938354}]
Batch size: 32


Source genre: slate


Target genre: fiction
Number of target samples: 69613


Source genre: slate
Target genre: fiction
Number of target samples: 69613
Source dataset length: 69575
Target dataset length: 15922


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Using 16bit Automatic Mixed Precision (AMP)


GPU available: True (cuda), used: True


TPU available: False, using: 0 TPU cores


IPU available: False, using: 0 IPUs


HPU available: False, using: 0 HPUs


`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..


`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..


`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | BertAdapterModel   | 119 M 
1 | criterion     | CrossEntropyLoss   | 0     
2 | mlm_criterion | CrossEntropyLoss   | 0     
3 | accuracy      | MulticlassAccuracy | 0     
4 | f1            | MulticlassF1Score  | 0     
5 | f1_macro      | MulticlassF1Score  | 0     
6 | f1_micro      | MulticlassF1Score  | 0     
7 | softmax       | Softmax            | 0     
-----------------------------------------------------
9.5 M     Trainable params
109 M     Non-trainable params
119 M     Total params
476.013   Total estimated model params size (MB)


eee


Sanity Checking: |                                                                                            …

val/accuracy: 0.46875
val/f1: 0.6376811265945435
val/taskclf_loss: 1.072509765625
val/loss: 1.1914063692092896
val/mlm_loss: 1.710953712463379


Training: |                                                                                                   …

Validation: |                                                                                                 …

val/accuracy: 0.7360876798629761
val/f1: 0.7357834577560425
val/taskclf_loss: 0.6262332201004028
val/loss: 0.7977961301803589
val/mlm_loss: 1.5474815368652344


Validation: |                                                                                                 …

val/accuracy: 0.7507203817367554
val/f1: 0.751419723033905
val/taskclf_loss: 0.5982270836830139
val/loss: 0.7748212218284607
val/mlm_loss: 1.5464917421340942


Validation: |                                                                                                 …

val/accuracy: 0.7564022541046143
val/f1: 0.7571414709091187
val/taskclf_loss: 0.6041162014007568
val/loss: 0.7764821648597717
val/mlm_loss: 1.5296770334243774


Validation: |                                                                                                 …

val/accuracy: 0.7612276673316956
val/f1: 0.7617570757865906
val/taskclf_loss: 0.6255995631217957
val/loss: 0.7952306270599365
val/mlm_loss: 1.5364739894866943


Validation: |                                                                                                 …

val/accuracy: 0.7549817562103271
val/f1: 0.7552047967910767
val/taskclf_loss: 0.7012771368026733
val/loss: 0.856421709060669
val/mlm_loss: 1.5343626737594604


Validation: |                                                                                                 …

val/accuracy: 0.7620840668678284
val/f1: 0.7625953555107117
val/taskclf_loss: 0.791671633720398
val/loss: 0.9280391335487366
val/mlm_loss: 1.523929476737976


Validation: |                                                                                                 …

val/accuracy: 0.7630763053894043
val/f1: 0.7628111839294434
val/taskclf_loss: 0.8529582023620605
val/loss: 0.9775769710540771
val/mlm_loss: 1.522128701210022


Validation: |                                                                                                 …

val/accuracy: 0.7614383697509766
val/f1: 0.761969268321991
val/taskclf_loss: 0.8757399320602417
val/loss: 0.993333101272583
val/mlm_loss: 1.5071845054626465


Validation: |                                                                                                 …

val/accuracy: 0.7651832103729248
val/f1: 0.7647231817245483
val/taskclf_loss: 0.8596656322479248
val/loss: 0.9814408421516418
val/mlm_loss: 1.5135666131973267


Validation: |                                                                                                 …

`Trainer.fit` stopped: `max_epochs=10` reached.


val/accuracy: 0.764279305934906
val/f1: 0.7639526128768921
val/taskclf_loss: 0.8629684448242188
val/loss: 0.9820606708526611
val/mlm_loss: 1.50246262550354


Best checkpoint path: checkpoints/lightning_logs/version_5/checkpoints/task-SFUni-epoch=01-val_loss=0.77.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_5/checkpoints/SFUni-epoch=05.ckpt


Source genre: slate
Target genre: fiction
Number of target samples: 69613


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results Last Epoch: [{'source_test/loss': 0.9002724289894104, 'source_test/accuracy': 0.7501199841499329, 'source_test/f1': 0.7500344514846802, 'source_test/f1_macro': 0.7386426329612732, 'source_test/f1_micro': 0.7501199841499329, 'target_test/loss': 0.803888738155365, 'target_test/accuracy': 0.7864823341369629, 'target_test/f1': 0.7858926653862, 'target_test/f1_macro': 0.7777913808822632, 'target_test/f1_micro': 0.7864823341369629}]
Best checkpoint path: checkpoints/lightning_logs/version_5/checkpoints/task-SFUni-epoch=01-val_loss=0.77.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_5/checkpoints/SFUni-epoch=05.ckpt


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results on Best Model: [{'source_test/loss': 0.6602180600166321, 'source_test/accuracy': 0.7239342927932739, 'source_test/f1': 0.7242387533187866, 'source_test/f1_macro': 0.7116022109985352, 'source_test/f1_micro': 0.7239342927932739, 'target_test/loss': 0.5678831338882446, 'target_test/accuracy': 0.7726094126701355, 'target_test/f1': 0.7729753851890564, 'target_test/f1_macro': 0.761871337890625, 'target_test/f1_micro': 0.7726094126701355}]


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results on saved epoch: [{'source_test/loss': 0.837738037109375, 'source_test/accuracy': 0.7519201040267944, 'source_test/f1': 0.751878023147583, 'source_test/f1_macro': 0.7398357391357422, 'source_test/f1_micro': 0.7519201040267944, 'target_test/loss': 0.7335904240608215, 'target_test/accuracy': 0.7839621901512146, 'target_test/f1': 0.7835701107978821, 'target_test/f1_macro': 0.7736867666244507, 'target_test/f1_micro': 0.7839621901512146}]


In [6]:
results.items()

dict_items([('last_epoch', {'source_test/loss': [0.9137237071990967, 0.9453220367431641, 0.9002724289894104], 'source_test/accuracy': [0.7501439452171326, 0.7461116909980774, 0.7501199841499329], 'source_test/f1': [0.7484152913093567, 0.7443251013755798, 0.7500344514846802], 'source_test/f1_macro': [0.7419396042823792, 0.7347594499588013, 0.7386426329612732], 'source_test/f1_micro': [0.7501439452171326, 0.7461116909980774, 0.7501199841499329], 'target_test/loss': [0.7731056809425354, 0.7755804657936096, 0.803888738155365], 'target_test/accuracy': [0.7799059152603149, 0.7804339528083801, 0.7864823341369629], 'target_test/f1': [0.7796570658683777, 0.7802576422691345, 0.7858926653862], 'target_test/f1_macro': [0.7703190445899963, 0.7716965079307556, 0.7777913808822632], 'target_test/f1_micro': [0.7799059152603149, 0.7804339528083801, 0.7864823341369629]}), ('best_model', {'source_test/loss': [0.6424590945243835, 0.6571228504180908, 0.6602180600166321], 'source_test/accuracy': [0.735815107

In [7]:
# Calculate mean and standard deviation for each scenario
mean_results = {scenario: {key: np.mean(values) for key, values in metrics.items()} for scenario, metrics in results.items()}
std_results = {scenario: {key: np.std(values) for key, values in metrics.items()} for scenario, metrics in results.items()}

# Log mean and standard deviation results to wandb
# wandb.init(project=project_name, name=f'{domain}_mean_results')
# for scenario in mean_results:
#     for key, value in mean_results[scenario].items():
#         wandb.log({f"{scenario}/{key}": value})
#         wandb.log({f"{scenario}/{key}_std": std_results[scenario][key]})
# wandb.finish()

print("Mean Results:", mean_results)
print("Standard Deviation Results:", std_results)

# # Save the best model's adapter
# if model:
#     adapter_save_path = f"../../saved/adapter_after_run/{hparams['task_adapter_name']}"
#     model.save_adapter(adapter_save_path, hparams['task_adapter_name'])
#     print(f"Adapter saved to {adapter_save_path}")
# else:
#     print("No best model to save.")

Mean Results: {'last_epoch': {'source_test/loss': 0.919772724310557, 'source_test/accuracy': 0.7487918734550476, 'source_test/f1': 0.7475916147232056, 'source_test/f1_macro': 0.7384472290674845, 'source_test/f1_micro': 0.7487918734550476, 'target_test/loss': 0.78419162829717, 'target_test/accuracy': 0.782274067401886, 'target_test/f1': 0.7819357911745707, 'target_test/f1_macro': 0.773268977801005, 'target_test/f1_micro': 0.782274067401886}, 'best_model': {'source_test/loss': 0.6532666683197021, 'source_test/accuracy': 0.7317748268445333, 'source_test/f1': 0.7320729891459147, 'source_test/f1_macro': 0.7198665738105774, 'source_test/f1_micro': 0.7317748268445333, 'target_test/loss': 0.5630188584327698, 'target_test/accuracy': 0.7718653877576193, 'target_test/f1': 0.7723223567008972, 'target_test/f1_macro': 0.7613149086634318, 'target_test/f1_micro': 0.7718653877576193}, 'epoch_saved': {'source_test/loss': 0.8723350763320923, 'source_test/accuracy': 0.7474638024965922, 'source_test/f1': 0

In [8]:
print('dones')

dones


In [9]:
best_val_loss

inf