In [1]:
import sys
from pathlib import Path

# Function to set up the source path for modules
def setup_src_path():
    module_path = str(Path.cwd().parents[1] / "modules")
    if module_path not in sys.path:
        sys.path.append(module_path)
    return sys.path

# Call the function to ensure the path is set
setup_src_path()

# Print the current Python path for debugging
print("Current Python path:", sys.path)

import os

os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["OMP_NUM_THREADS"] = "1"

print(f"TOKENIZERS_PARALLELISM: {os.environ.get('TOKENIZERS_PARALLELISM')}")
print(f"OMP_NUM_THREADS: {os.environ.get('OMP_NUM_THREADS')}")

print("Starting imports...")
from typing import Optional, Dict, Any
import torch
import pytorch_lightning as pl
import pandas as pd
from transformers import AutoTokenizer
from torch.utils.data import Dataset, DataLoader
from rich.traceback import install
from pytorch_lightning.loggers import WandbLogger
import torch.nn as nn
import torch.optim as optim
from transformers import AutoConfig, get_cosine_schedule_with_warmup
from adapters import AutoAdapterModel, AdapterConfig
from adapters.composition import Stack
from pytorch_lightning import seed_everything, Trainer
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
import torchmetrics
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import numpy as np
print("Imports completed successfully.")

install(show_locals=True)

import data.processed as processed
import config.config as config
import utils.setup as setup
import utils.functions as fn
from importlib import reload

from datasets import load_from_disk

print(config.Config.TXT_SAVE_PATH)
print(config.Config.MODEL_SAVE_PATH)

dataset_path = f"../{config.Config.DATASETS_SAVE_PATH}/datasets"
print(f"Loading dataset from: {dataset_path}")

if os.path.exists(dataset_path):
    dataset = load_from_disk(dataset_path)
    print("Dataset loaded successfully")
else:
    print("Dataset path does not exist")


Current Python path: ['/usr/lib/python38.zip', '/usr/lib/python3.8', '/usr/lib/python3.8/lib-dynload', '', '/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages', '/home/guest/Desktop/projects/third-experiments/domain_adaptation_project/modules']
TOKENIZERS_PARALLELISM: false
OMP_NUM_THREADS: 1
Starting imports...


Imports completed successfully.


2024-08-30 00:42:30.723577: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-08-30 00:42:30.752882: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.




./text-files/
./hp-model-
Loading dataset from: ../../../datasets/datasets
Dataset loaded successfully


In [2]:
import torch
import os
import pytorch_lightning as pl
from transformers import  AutoConfig, DataCollatorForLanguageModeling
from collections import defaultdict
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
import numpy as np
import torchmetrics

class JointDomainTaskAdapter(pl.LightningModule):
    def __init__(self, hparams,source_dataset_length,target_dataset_length):
        super(JointDomainTaskAdapter, self).__init__()
        self.save_hyperparameters(hparams)

        # Load config with hidden states output
        self.config = AutoConfig.from_pretrained(self.hparams["pretrained_model_name"])
        self.config.output_hidden_states = True
        self.model = AutoAdapterModel.from_pretrained(self.hparams["pretrained_model_name"], config=self.config)

        # Set reduction factor and leave_out layers
        self.reduction_factor = self.hparams.get("reduction_factor", 16)
        self.leave_out = self.hparams.get("leave_out", [])
        # if self.leave_out != "None":
        #     self.leave_out = self.leave_out.split(",")
        #     self.leave_out = [int(i) for i in self.leave_out]
        # else:
        #     self.leave_out = []

        # Load MLM adapter with head
        self.model.load_adapter(f"{self.hparams['saved_adapter_dir']}/{self.hparams['domain_adapter_name']}", with_head=True)

        # Add classification head for the task
        self.model.add_classification_head(f"{self.hparams['task_adapter_name']}", num_labels=self.hparams["num_classes"])

        # Set active adapters
        self.model.train_adapter(self.hparams['domain_adapter_name'])
        # Calculate alpha based on dataset lengths
        self.alpha = source_dataset_length / (source_dataset_length + target_dataset_length)

        # Initialize loss functions and metrics
        self.criterion = nn.CrossEntropyLoss()
        self.mlm_criterion = nn.CrossEntropyLoss()
        self.accuracy = torchmetrics.Accuracy(task='multiclass', num_classes=self.hparams["num_classes"])
        self.f1 = torchmetrics.F1Score(task='multiclass', num_classes=self.hparams["num_classes"], average="weighted")
        self.f1_macro = torchmetrics.F1Score(task='multiclass', num_classes=self.hparams["num_classes"], average="macro")
        self.softmax = nn.Softmax(dim=1)
        self.validation_outputs = []
        self.test_outputs = []
        # Optimizer related variables
        self.learning_rate = self.hparams.get("learning_rate", 1e-4)
        self.scheduler_factor = self.hparams.get("scheduler_factor", 0.1)
        self.scheduler_patience = self.hparams.get("scheduler_patience", 0.05)
        self.scheduler_threshold = self.hparams.get("scheduler_threshold", 0.0001)
        self.scheduler_cooldown = self.hparams.get("scheduler_cooldown", 0)
        self.scheduler_eps = self.hparams.get("scheduler_eps", 1e-8)

    def forward(self, input_ids, attention_mask=None, labels=None, task=None):
        if task == "mlm":
            self.model.active_head= self.hparams['domain_adapter_name']
            outputs = self.model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        elif task == "classification":
            self.model.active_head= self.hparams['task_adapter_name']
            outputs = self.model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        else:
            raise ValueError("Task must be either 'mlm' or 'classification'.")
        return outputs

    def training_step(self, batch, batch_idx):
        # Separate source and target data processing
        source_input_ids = batch["source_input_ids"]
        source_attention_mask = batch["source_attention_mask"]
        source_labels = batch["label_source"]

        target_input_ids = batch["target_input_ids"]
        target_attention_mask = batch["target_attention_mask"]
        mlm_labels = batch["mlm_labels"]

        # Calculate dynamic alpha based on the lengths of source and target data
        alpha = self.alpha

        # Classification task
        cls_outputs = self(input_ids=source_input_ids, attention_mask=source_attention_mask, task="classification")
        cls_logits = cls_outputs.logits
        task_loss = self.criterion(cls_logits, source_labels)

        # MLM task
        mlm_outputs = self(input_ids=target_input_ids, attention_mask=target_attention_mask, labels=mlm_labels, task="mlm")
        mlm_loss = mlm_outputs.loss

        # Combine losses
        loss = alpha * task_loss + (1 - alpha) * mlm_loss

        accuracy = self.accuracy(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))
        f1 = self.f1(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))

        metrics = {
            "train/accuracy": accuracy,
            "train/f1": f1,
            "train/taskclf_loss": task_loss,
            "train/loss": loss,
            "train/mlm_loss": mlm_loss
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)

        return loss

    def validation_step(self, batch, batch_idx):
        # Separate source and target data processing
        source_input_ids = batch["source_input_ids"]
        source_attention_mask = batch["source_attention_mask"]
        source_labels = batch["label_source"]

        target_input_ids = batch["target_input_ids"]
        target_attention_mask = batch["target_attention_mask"]
        mlm_labels = batch["mlm_labels"]

        # Calculate dynamic alpha based on the lengths of source and target data
        alpha = self.alpha
        # Classification task
        cls_outputs = self(input_ids=source_input_ids, attention_mask=source_attention_mask, task="classification")
        cls_logits = cls_outputs.logits
        task_loss = self.criterion(cls_logits, source_labels)

        # MLM task
        mlm_outputs = self(input_ids=target_input_ids, attention_mask=target_attention_mask, labels=mlm_labels, task="mlm")
        mlm_loss = mlm_outputs.loss

        # Combine losses
        loss = alpha * task_loss + (1 - alpha) * mlm_loss

        accuracy = self.accuracy(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))
        f1 = self.f1(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))
        self.validation_outputs.append({
            "val/accuracy": accuracy,
            "val/f1": f1,
            "val/taskclf_loss": task_loss,
            "val/loss": loss,
            "val/mlm_loss": mlm_loss
                })
        
        metrics = {
            "val/accuracy": accuracy,
            "val/f1": f1,
            "val/taskclf_loss": task_loss,
            "val/loss": loss,
            "val/mlm_loss": mlm_loss
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)

        return metrics
    def on_validation_epoch_start(self):
        self.validation_outputs = []
    
    def on_validation_epoch_end(self):
        outputs= self.validation_outputs

        avg_loss = torch.stack([x["val/loss"] for x in outputs]).mean()
        avg_task_loss = torch.stack([x["val/taskclf_loss"] for x in outputs]).mean()
        avg_mlm_loss = torch.stack([x["val/mlm_loss"] for x in outputs]).mean()
        avg_accuracy = torch.stack([x["val/accuracy"] for x in outputs]).mean()
        avg_f1 = torch.stack([x["val/f1"] for x in outputs]).mean()
        print(f"val/accuracy: {avg_accuracy}")
        print(f"val/f1: {avg_f1}")
        print(f"val/taskclf_loss: {avg_task_loss}")
        print(f"val/loss: {avg_loss}")
        print(f"val/mlm_loss: {avg_mlm_loss}")
        metrics = {
            "val/avg_loss": avg_loss,
            "val/avg_taskclf_loss": avg_task_loss,
            "val/avg_mlm_loss": avg_mlm_loss,
            "val/avg_accuracy": avg_accuracy,
            "val/avg_f1": avg_f1,
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)
        self.log("val_loss", avg_loss)

    def test_step(self, batch, batch_idx):
        # Separate source and target data processing
        source_input_ids = batch["source_input_ids"]
        source_attention_mask = batch["source_attention_mask"]
        source_labels = batch["label_source"]

        target_input_ids = batch["target_input_ids"]
        target_attention_mask = batch["target_attention_mask"]
        target_labels = batch["label_target"]

        # Classification task for source data
        cls_outputs_source = self(input_ids=source_input_ids, attention_mask=source_attention_mask, task="classification")
        cls_logits_source = cls_outputs_source.logits
        task_loss_source = self.criterion(cls_logits_source, source_labels)

        # Classification task for target data
        cls_outputs_target = self(input_ids=target_input_ids, attention_mask=target_attention_mask, task="classification")
        cls_logits_target = cls_outputs_target.logits
        task_loss_target = self.criterion(cls_logits_target, target_labels)

        # Combine losses (though typically you would evaluate them separately)
        loss = task_loss_source + task_loss_target

        accuracy_source = self.accuracy(source_labels, torch.argmax(self.softmax(cls_logits_source), dim=1))
        f1_source = self.f1(source_labels, torch.argmax(self.softmax(cls_logits_source), dim=1))
        f1_macro_source = self.f1_macro(source_labels, torch.argmax(self.softmax(cls_logits_source), dim=1))

        accuracy_target = self.accuracy(target_labels, torch.argmax(self.softmax(cls_logits_target), dim=1))
        f1_target = self.f1(target_labels, torch.argmax(self.softmax(cls_logits_target), dim=1))
        f1_macro_target = self.f1_macro(target_labels, torch.argmax(self.softmax(cls_logits_target), dim=1))

        metrics = {
            "source_test/loss": task_loss_source,
            "source_test/accuracy": accuracy_source,
            "source_test/f1": f1_source,
            "source_test/f1_macro": f1_macro_source,
            "target_test/loss": task_loss_target,
            "target_test/accuracy": accuracy_target,
            "target_test/f1": f1_target,
            "target_test/f1_macro": f1_macro_target,
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)

        self.test_outputs.append({
            "source_test/loss": task_loss_source,
            "source_test/accuracy": accuracy_source,
            "source_test/f1": f1_source,
            "source_test/f1_macro": f1_macro_source,
            "target_test/loss": task_loss_target,
            "target_test/accuracy": accuracy_target,
            "target_test/f1": f1_target,
            "target_test/f1_macro": f1_macro_target,
        })
        return metrics
    def on_test_epoch_start(self):
        self.test_outputs = []

    def on_test_epoch_end(self):
        outputs=  self.test_outputs

        avg_loss_source = torch.stack([x["source_test/loss"] for x in outputs]).mean()
        avg_task_loss_target = torch.stack([x["target_test/loss"] for x in outputs]).mean()
        avg_accuracy_source = torch.stack([x["source_test/accuracy"] for x in outputs]).mean()
        avg_f1_source = torch.stack([x["source_test/f1"] for x in outputs]).mean()
        avg_f1_macro_source = torch.stack([x["source_test/f1_macro"] for x in outputs]).mean()

        avg_accuracy_target = torch.stack([x["target_test/accuracy"] for x in outputs]).mean()
        avg_f1_target = torch.stack([x["target_test/f1"] for x in outputs]).mean()
        avg_f1_macro_target = torch.stack([x["target_test/f1_macro"] for x in outputs]).mean()

        metrics = {
            "source_test/loss": avg_loss_source,
            "target_test/loss": avg_task_loss_target,
            "source_test/accuracy": avg_accuracy_source,
            "source_test/f1": avg_f1_source,
            "source_test/f1_macro": avg_f1_macro_source,
            "target_test/accuracy": avg_accuracy_target,
            "target_test/f1": avg_f1_target,
            "target_test/f1_macro": avg_f1_macro_target,
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)
    def save_adapter(self, location, adapter_name):
        self.model.save_adapter(location, adapter_name)

    def configure_optimizers(self):
        optimizer = optim.AdamW(self.parameters(), lr=self.learning_rate)
        lr_scheduler = ReduceLROnPlateau(
            optimizer=optimizer,
            mode="min",
            factor=self.scheduler_factor,
            patience=self.scheduler_patience,
            threshold=self.scheduler_threshold,
            cooldown=self.scheduler_cooldown,
            eps=self.scheduler_eps,
            verbose=True,
        )
        return [optimizer], [{"scheduler": lr_scheduler, "reduce_lr_on_plateau": True, "monitor": "val_loss", "interval": "epoch"}]


In [3]:
import wandb

wandb.login()
# Wandb setup and training loop
seeds = [42, 10, 100]  # List of seeds
project_name = 'mixed_edited'  # Replace with your wandb project name
domain = 'STR'  # Replace with the specific domain for this notebook
type = 'inv'  # Replace with the specific type for this notebook

# Initialize results dictionary
results = {
    "last_epoch": {
        "source_test/loss": [],
        "source_test/accuracy": [],
        "source_test/f1": [],
        "source_test/f1_macro": [],
        "target_test/loss": [],
        "target_test/accuracy": [],
        "target_test/f1": [],
        "target_test/f1_macro": [],
    },
    "best_model": {
        "source_test/loss": [],
        "source_test/accuracy": [],
        "source_test/f1": [],
        "source_test/f1_macro": [],
        "target_test/loss": [],
        "target_test/accuracy": [],
        "target_test/f1": [],
        "target_test/f1_macro": [],
    },
    "epoch_saved": {
        "source_test/loss": [],
        "source_test/accuracy": [],
        "source_test/f1": [],
        "source_test/f1_macro": [],
        "target_test/loss": [],
        "target_test/accuracy": [],
        "target_test/f1": [],
        "target_test/f1_macro": [],
    }
}

best_val_loss = float('inf')
best_model = None
best_model_path = ""

[34m[1mwandb[0m: Currently logged in as: [33mmrawhani5[0m ([33mmrawhani[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [4]:
reload(processed)
for seed in seeds:
    wandb.init(project=project_name, name=f'{domain}_{type}_run_with_seed_{seed}', config={'seed': seed})

    try:
        seed_everything(seed)

        hparams = {
            "source_target": "slate_travel",
            "source_domain": "slate",
            "target_domain": "travel",
            "domain_adapter_name": "mlm_inv_TR",
            "task_adapter_name": "task_STR",
            "pretrained_model_name": "bert-base-uncased",
            "padding": "max_length",
            "max_seq_length": 128,
            "bsz": 32,
            "num_classes": 3,
            "learning_rate": 1e-4,
            "reduction_factor": 16,
            "mode": "domain",
            "saved_adapter_dir": "../../saved/adapters",
        }

        save_dir = "checkpoints"
        save_epoch_3 = 6  # Save model at the 3rd epoch
        
        dm = processed.DataModuleSourceTargetMixed(hparams)
        dm.setup('fit')
        dm.setup("test")
        source_length, target_length = dm.get_dataset_lengths()
        print(f"Source dataset length: {source_length}")
        print(f"Target dataset length: {target_length}")
        model = JointDomainTaskAdapter(hparams,source_length,target_length)

        checkpoint_callback = ModelCheckpoint(
            filename="task-STR-{epoch:02d}-{val_loss:.2f}",
            save_top_k=1,
            monitor="val_loss",
            mode="min",
        )
        save_model_callback_epoch = ModelCheckpoint(
            filename="STR-{epoch:02d}",
            every_n_epochs=save_epoch_3,
            save_top_k=-1,
        )

        wandb_logger = WandbLogger()

    except Exception as e:
        print(f"Error during preprocessing : {e}")

    try:
        train_loader = dm.train_dataloader()
        val_loader = dm.val_dataloader()
        trainer = Trainer(
            max_epochs=10,
            accelerator="auto",
            precision=16,
            
            default_root_dir="checkpoints",
            logger=wandb_logger,
            callbacks=[checkpoint_callback, save_model_callback_epoch],
            limit_train_batches=1.0,
            limit_val_batches=1.0,
            limit_test_batches=1.0,
        )

        trainer.fit(model, train_loader, val_loader)
        print(f"Best checkpoint path: {checkpoint_callback.best_model_path}")
        print(f"Saved epoch checkpoint path: {save_model_callback_epoch.best_model_path}")
    except Exception as e:
        print(f"Error during training : {e}")

    try:
        dm.setup("test")
        test_loader = dm.test_dataloader()
        test_results_last = trainer.test(model, test_loader)
        print("Test Results Last Epoch:", test_results_last)

        # Collect results for last epoch model
        for key, value in test_results_last[0].items():
            results["last_epoch"][key].append(value)

        # Paths to the saved checkpoints
        best_checkpoint_path = checkpoint_callback.best_model_path
        saved_epoch_checkpoint_path = save_model_callback_epoch.best_model_path
        print(f"Best checkpoint path: {best_checkpoint_path}")
        print(f"Saved epoch checkpoint path: {saved_epoch_checkpoint_path}")

        best_model = JointDomainTaskAdapter.load_from_checkpoint(best_checkpoint_path,source_dataset_length=source_length, target_dataset_length=target_length)
        test_results_best = trainer.test(best_model, test_loader)
        print("Test Results on Best Model:", test_results_best)
        for key, value in test_results_best[0].items():
            results["best_model"][key].append(value)

        saved_epoch_model = JointDomainTaskAdapter.load_from_checkpoint(saved_epoch_checkpoint_path,source_dataset_length=source_length, target_dataset_length=target_length)
        test_results_saved_epoch = trainer.test(saved_epoch_model, test_loader)
        print("Test Results on saved epoch:", test_results_saved_epoch)
        for key, value in test_results_saved_epoch[0].items():
            results["epoch_saved"][key].append(value)

    except Exception as e:
        print(f"Error during testing: {e}")

    wandb.finish()

[34m[1mwandb[0m: wandb version 0.17.8 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[34m[1mwandb[0m: Tracking run with wandb version 0.17.0


[34m[1mwandb[0m: Run data is saved locally in [35m[1m/home/guest/Desktop/projects/third-experiments/domain_adaptation_project/mixed/mixed/wandb/run-20240830_004233-w1877ihb[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.


[34m[1mwandb[0m: Syncing run [33mSTR_inv_run_with_seed_42[0m


[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/mrawhani/mixed_edited[0m


[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/mrawhani/mixed_edited/runs/w1877ihb[0m


Seed set to 42




Batch size: 32


Source genre: slate
Target genre: travel
Number of target samples: 69615


Source genre: slate
Target genre: travel
Number of target samples: 69615


Source dataset length: 69575
Target dataset length: 24519


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/lightning_fabric/connector.py:563: `precision=16` is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
Using 16bit Automatic Mixed Precision (AMP)


GPU available: True (cuda), used: True


TPU available: False, using: 0 TPU cores


IPU available: False, using: 0 IPUs


HPU available: False, using: 0 HPUs


`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..


`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..


`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/pytorch_lightning/loggers/wandb.py:396: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


eee


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/pytorch_lightning/core/optimizer.py:257: Found unsupported keys in the lr scheduler dict: {'reduce_lr_on_plateau'}. HINT: remove them from the output of `configure_optimizers`.

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | BertAdapterModel   | 118 M 
1 | criterion     | CrossEntropyLoss   | 0     
2 | mlm_criterion | CrossEntropyLoss   | 0     
3 | accuracy      | MulticlassAccuracy | 0     
4 | f1            | MulticlassF1Score  | 0     
5 | f1_macro      | MulticlassF1Score  | 0     
6 | softmax       | Softmax            | 0     
-----------------------------------------------------
9.2 M     Trainable params
109 M     Non-trainable params
118 M     Total params
474.833   Total estimated model params size (MB)


Sanity Checking: |                                                                                            …

val/accuracy: 0.265625
val/f1: 0.3452845811843872
val/taskclf_loss: 1.1042633056640625
val/loss: 1.2570621967315674
val/mlm_loss: 1.6906431913375854


Training: |                                                                                                   …

Validation: |                                                                                                 …

val/accuracy: 0.7245473265647888
val/f1: 0.7256537675857544
val/taskclf_loss: 0.6466049551963806
val/loss: 0.8874913454055786
val/mlm_loss: 1.5710291862487793


Validation: |                                                                                                 …

val/accuracy: 0.7470570802688599
val/f1: 0.7473435401916504
val/taskclf_loss: 0.6137298941612244
val/loss: 0.8597187399864197
val/mlm_loss: 1.5577354431152344


Validation: |                                                                                                 …

val/accuracy: 0.7453376054763794
val/f1: 0.7452155947685242
val/taskclf_loss: 0.6260339617729187
val/loss: 0.8660997748374939
val/mlm_loss: 1.5473092794418335


Validation: |                                                                                                 …

val/accuracy: 0.755151629447937
val/f1: 0.7557623386383057
val/taskclf_loss: 0.6921623349189758
val/loss: 0.9094310402870178
val/mlm_loss: 1.5259515047073364


Validation: |                                                                                                 …

val/accuracy: 0.7589440941810608
val/f1: 0.7587811946868896
val/taskclf_loss: 0.6764888167381287
val/loss: 0.9017810225486755
val/mlm_loss: 1.5410691499710083


Validation: |                                                                                                 …

val/accuracy: 0.7592023611068726
val/f1: 0.7591233253479004
val/taskclf_loss: 0.6739057302474976
val/loss: 0.8986459374427795
val/mlm_loss: 1.5363677740097046


Validation: |                                                                                                 …

val/accuracy: 0.759331464767456
val/f1: 0.7591620683670044
val/taskclf_loss: 0.6736086010932922
val/loss: 0.8990622758865356
val/mlm_loss: 1.5388084650039673


Validation: |                                                                                                 …

val/accuracy: 0.759331464767456
val/f1: 0.7591984868049622
val/taskclf_loss: 0.6733314394950867
val/loss: 0.8994597792625427
val/mlm_loss: 1.541120171546936


Validation: |                                                                                                 …

val/accuracy: 0.759331464767456
val/f1: 0.7591322660446167
val/taskclf_loss: 0.6731385588645935
val/loss: 0.8977518081665039
val/mlm_loss: 1.5351133346557617


Validation: |                                                                                                 …

`Trainer.fit` stopped: `max_epochs=10` reached.


val/accuracy: 0.7594606280326843
val/f1: 0.7592366933822632
val/taskclf_loss: 0.6729351878166199
val/loss: 0.8971433639526367
val/mlm_loss: 1.5333552360534668


Best checkpoint path: ./lightning_logs/w1877ihb/checkpoints/task-STR-epoch=01-val_loss=0.86.ckpt
Saved epoch checkpoint path: ./lightning_logs/w1877ihb/checkpoints/STR-epoch=05.ckpt


Source genre: slate
Target genre: travel
Number of target samples: 69615


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results Last Epoch: [{'source_test/loss': 0.7335346937179565, 'source_test/accuracy': 0.7464717626571655, 'source_test/f1': 0.7459375262260437, 'source_test/f1_macro': 0.7360235452651978, 'target_test/loss': 0.6007658243179321, 'target_test/accuracy': 0.7758736610412598, 'target_test/f1': 0.7759540677070618, 'target_test/f1_macro': 0.7660940885543823}]
Best checkpoint path: ./lightning_logs/w1877ihb/checkpoints/task-STR-epoch=01-val_loss=0.86.ckpt
Saved epoch checkpoint path: ./lightning_logs/w1877ihb/checkpoints/STR-epoch=05.ckpt


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results on Best Model: [{'source_test/loss': 0.6768397688865662, 'source_test/accuracy': 0.7236222624778748, 'source_test/f1': 0.72342848777771, 'source_test/f1_macro': 0.7134289145469666, 'target_test/loss': 0.5580638647079468, 'target_test/accuracy': 0.769825279712677, 'target_test/f1': 0.7702353596687317, 'target_test/f1_macro': 0.7605107426643372}]


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results on saved epoch: [{'source_test/loss': 0.7348485589027405, 'source_test/accuracy': 0.7454636693000793, 'source_test/f1': 0.745010495185852, 'source_test/f1_macro': 0.7351593971252441, 'target_test/loss': 0.6019358038902283, 'target_test/accuracy': 0.7743615508079529, 'target_test/f1': 0.7743772268295288, 'target_test/f1_macro': 0.764814555644989}]


[34m[1mwandb[0m: - 0.003 MB of 0.003 MB uploaded

[34m[1mwandb[0m: \ 0.003 MB of 0.016 MB uploaded

[34m[1mwandb[0m: | 0.003 MB of 0.016 MB uploaded

[34m[1mwandb[0m: / 0.016 MB of 0.016 MB uploaded

[34m[1mwandb[0m:                                                                                


[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:                epoch ▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇█
[34m[1mwandb[0m: source_test/accuracy █▁█
[34m[1mwandb[0m:       source_test/f1 █▁█
[34m[1mwandb[0m: source_test/f1_macro █▁█
[34m[1mwandb[0m:     source_test/loss █▁█
[34m[1mwandb[0m: target_test/accuracy █▁▆
[34m[1mwandb[0m:       target_test/f1 █▁▆
[34m[1mwandb[0m: target_test/f1_macro █▁▆
[34m[1mwandb[0m:     target_test/loss █▁█
[34m[1mwandb[0m:       train/accuracy ▁▄▃▃▃▅▃▅▄▇▄▄▆▆▃▆▆▇▅▅▅▆▄▇▆▇▄█▇▆▅▆▂▅█▇▂▄▇█
[34m[1mwandb[0m:             train/f1 ▁▄▃▃▃▅▃▅▄▇▄▄▆▆▃▆▆▇▅▅▅▆▄▇▆▇▄█▇▆▅▆▂▅█▇▃▄▇█
[34m[1mwandb[0m:           train/loss █▆▆▇▆▄▆▅▆▄▆▅▃▃▅▃▃▂▃▅▄▄▆▂▃▂▄▃▄▃▄▅▇▂▁▂▆▅▁▂
[34m[1mwandb[0m:       train/mlm_loss ▅▆▅▅▄▅▆▇▇▅▆▄▂▄▃▅▅▄▅▃▄▅▅▄▅▄▇▆▆▆▆█▅▁▂▂▆▆▅▄
[34m[1mwandb[0m:   train/taskclf_loss █▆▆▇▆▄▅▄▅▃▆▅▄▃▅▃▂▂▃▅▄▃▆▂▃▂▃▂▃▃▄▄▇▃▁▂▆▅▁▂
[34m[1mwandb[0m:  trainer/global_step ▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
[34m[1mwan

[34m[1mwandb[0m: 🚀 View run [33mSTR_inv_run_with_seed_42[0m at: [34m[4mhttps://wandb.ai/mrawhani/mixed_edited/runs/w1877ihb[0m
[34m[1mwandb[0m: ⭐️ View project at: [34m[4mhttps://wandb.ai/mrawhani/mixed_edited[0m
[34m[1mwandb[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)


[34m[1mwandb[0m: Find logs at: [35m[1m./wandb/run-20240830_004233-w1877ihb/logs[0m


[34m[1mwandb[0m: - Waiting for wandb.init()...

[34m[1mwandb[0m: \ Waiting for wandb.init()...

[34m[1mwandb[0m: wandb version 0.17.8 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[34m[1mwandb[0m: Tracking run with wandb version 0.17.0


[34m[1mwandb[0m: Run data is saved locally in [35m[1m/home/guest/Desktop/projects/third-experiments/domain_adaptation_project/mixed/mixed/wandb/run-20240830_014722-856xrqkt[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.


[34m[1mwandb[0m: Syncing run [33mSTR_inv_run_with_seed_10[0m


[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/mrawhani/mixed_edited[0m


[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/mrawhani/mixed_edited/runs/856xrqkt[0m


Seed set to 10




Batch size: 32


Source genre: slate
Target genre: travel
Number of target samples: 69615


Source genre: slate


Target genre: travel
Number of target samples: 69615
Source dataset length: 69575
Target dataset length: 24519


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/lightning_fabric/connector.py:563: `precision=16` is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
Using 16bit Automatic Mixed Precision (AMP)


GPU available: True (cuda), used: True


TPU available: False, using: 0 TPU cores


IPU available: False, using: 0 IPUs


HPU available: False, using: 0 HPUs


`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..


`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..


`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/pytorch_lightning/loggers/wandb.py:396: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/pytorch_lightning/core/optimizer.py:257: Found unsupported keys in the lr scheduler dict: {'reduce_lr_on_plateau'}. HINT: remove them from the output of `configure_optimizers`.

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | BertAdapterModel   | 118 M 
1 | criterion     | CrossEntropyLoss   | 0     
2 | mlm_criterion | CrossEntropyLoss   | 0     
3 | accuracy      | MulticlassAccuracy | 0     
4 | f1            | MulticlassF1Score  | 0     
5 | f1_macro      | MulticlassF1Score  | 0     
6 | softmax       | Softmax            | 0     
-----------------------------------------------------
9.2 M     Trainable params
109 M     Non-trainable params
118 M     Total params
474.833   Total estimated model params size (MB)


eee


Sanity Checking: |                                                                                            …

val/accuracy: 0.28125
val/f1: 0.34888407588005066
val/taskclf_loss: 1.1057662963867188
val/loss: 1.2457606792449951
val/mlm_loss: 1.643007755279541


Training: |                                                                                                   …

Validation: |                                                                                                 …

val/accuracy: 0.731051504611969
val/f1: 0.7314310669898987
val/taskclf_loss: 0.6418179273605347
val/loss: 0.8852640986442566
val/mlm_loss: 1.5760657787322998


Validation: |                                                                                                 …

val/accuracy: 0.7449094653129578
val/f1: 0.7449025511741638
val/taskclf_loss: 0.627510666847229
val/loss: 0.8689614534378052
val/mlm_loss: 1.5541009902954102


Validation: |                                                                                                 …

val/accuracy: 0.7465881705284119
val/f1: 0.7471816539764404
val/taskclf_loss: 0.6343274712562561
val/loss: 0.8725326061248779
val/mlm_loss: 1.5484623908996582


Validation: |                                                                                                 …

val/accuracy: 0.7564022541046143
val/f1: 0.7563953399658203
val/taskclf_loss: 0.693311870098114
val/loss: 0.9147379994392395
val/mlm_loss: 1.5430556535720825


Validation: |                                                                                                 …

val/accuracy: 0.7585566639900208
val/f1: 0.7587240934371948
val/taskclf_loss: 0.6799941658973694
val/loss: 0.9043495059013367
val/mlm_loss: 1.540979027748108


Validation: |                                                                                                 …

val/accuracy: 0.7592023611068726
val/f1: 0.7594632506370544
val/taskclf_loss: 0.6774413585662842
val/loss: 0.898950457572937
val/mlm_loss: 1.5275038480758667


Validation: |                                                                                                 …

val/accuracy: 0.7592023611068726
val/f1: 0.7594868540763855
val/taskclf_loss: 0.6771866679191589
val/loss: 0.9025968909263611
val/mlm_loss: 1.5422194004058838


Validation: |                                                                                                 …

val/accuracy: 0.7598479986190796
val/f1: 0.7601372003555298
val/taskclf_loss: 0.6769354939460754
val/loss: 0.901526153087616
val/mlm_loss: 1.5388233661651611


Validation: |                                                                                                 …

val/accuracy: 0.7597188949584961
val/f1: 0.7599970102310181
val/taskclf_loss: 0.6767580509185791
val/loss: 0.9017332792282104
val/mlm_loss: 1.5401220321655273


Validation: |                                                                                                 …

`Trainer.fit` stopped: `max_epochs=10` reached.


val/accuracy: 0.7597188949584961
val/f1: 0.7599970102310181
val/taskclf_loss: 0.6765585541725159
val/loss: 0.9035230278968811
val/mlm_loss: 1.5475564002990723


Best checkpoint path: ./lightning_logs/856xrqkt/checkpoints/task-STR-epoch=01-val_loss=0.87.ckpt
Saved epoch checkpoint path: ./lightning_logs/856xrqkt/checkpoints/STR-epoch=05.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Source genre: slate
Target genre: travel
Number of target samples: 69615


Testing: |                                                                                                    …

Test Results Last Epoch: [{'source_test/loss': 0.7133637070655823, 'source_test/accuracy': 0.7513440251350403, 'source_test/f1': 0.7517113089561462, 'source_test/f1_macro': 0.7410829663276672, 'target_test/loss': 0.6011308431625366, 'target_test/accuracy': 0.7787297964096069, 'target_test/f1': 0.7792301774024963, 'target_test/f1_macro': 0.7676406502723694}]
Best checkpoint path: ./lightning_logs/856xrqkt/checkpoints/task-STR-epoch=01-val_loss=0.87.ckpt
Saved epoch checkpoint path: ./lightning_logs/856xrqkt/checkpoints/STR-epoch=05.ckpt


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results on Best Model: [{'source_test/loss': 0.68798828125, 'source_test/accuracy': 0.7224462628364563, 'source_test/f1': 0.7220959663391113, 'source_test/f1_macro': 0.7109693288803101, 'target_test/loss': 0.5704085826873779, 'target_test/accuracy': 0.7649528980255127, 'target_test/f1': 0.7653031945228577, 'target_test/f1_macro': 0.7558027505874634}]


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results on saved epoch: [{'source_test/loss': 0.7146836519241333, 'source_test/accuracy': 0.7508400082588196, 'source_test/f1': 0.7510698437690735, 'source_test/f1_macro': 0.7405137419700623, 'target_test/loss': 0.6023080945014954, 'target_test/accuracy': 0.7777217626571655, 'target_test/f1': 0.7781782746315002, 'target_test/f1_macro': 0.7667186856269836}]


[34m[1mwandb[0m: - 0.002 MB of 0.002 MB uploaded

[34m[1mwandb[0m: \ 0.002 MB of 0.016 MB uploaded

[34m[1mwandb[0m: | 0.002 MB of 0.016 MB uploaded

[34m[1mwandb[0m: / 0.016 MB of 0.016 MB uploaded

[34m[1mwandb[0m:                                                                                


[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:                epoch ▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇█
[34m[1mwandb[0m: source_test/accuracy █▁█
[34m[1mwandb[0m:       source_test/f1 █▁█
[34m[1mwandb[0m: source_test/f1_macro █▁█
[34m[1mwandb[0m:     source_test/loss █▁█
[34m[1mwandb[0m: target_test/accuracy █▁▇
[34m[1mwandb[0m:       target_test/f1 █▁▇
[34m[1mwandb[0m: target_test/f1_macro █▁▇
[34m[1mwandb[0m:     target_test/loss █▁█
[34m[1mwandb[0m:       train/accuracy ▂▄▃▃▁▃▄▃▅▇▄▅▅▆▃▅▇▆▆▅▅▆▃▅▅▆▃█▅▆▇▇▃▆▇▆▄▅█▇
[34m[1mwandb[0m:             train/f1 ▃▄▄▃▁▄▄▃▅▇▄▅▅▆▃▅▇▆▆▅▅▆▃▅▅▆▃█▅▆▇▇▃▆▇▆▄▅█▇
[34m[1mwandb[0m:           train/loss █▅▄▇▆▄▄▆▄▂▅▅▃▃▅▃▁▁▂▃▃▂▇▂▅▂▅▂▃▂▂▃▆▄▁▁▅▄▁▂
[34m[1mwandb[0m:       train/mlm_loss ▄▅▂▄▁▂▂█▁▃▆▄▄▃▅▁▃▂▃▁▁▂▃▄▄▄▆▅▂▃▅▄▅▄▄▃▄▆▅▄
[34m[1mwandb[0m:   train/taskclf_loss █▅▅▇▇▅▅▅▅▃▅▅▃▄▅▄▂▂▂▄▄▂▇▂▅▂▄▂▄▃▂▃▆▄▂▂▅▃▁▂
[34m[1mwandb[0m:  trainer/global_step ▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
[34m[1mwan

[34m[1mwandb[0m: 🚀 View run [33mSTR_inv_run_with_seed_10[0m at: [34m[4mhttps://wandb.ai/mrawhani/mixed_edited/runs/856xrqkt[0m
[34m[1mwandb[0m: ⭐️ View project at: [34m[4mhttps://wandb.ai/mrawhani/mixed_edited[0m
[34m[1mwandb[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)


[34m[1mwandb[0m: Find logs at: [35m[1m./wandb/run-20240830_014722-856xrqkt/logs[0m


[34m[1mwandb[0m: - Waiting for wandb.init()...

[34m[1mwandb[0m: \ Waiting for wandb.init()...

[34m[1mwandb[0m: wandb version 0.17.8 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[34m[1mwandb[0m: Tracking run with wandb version 0.17.0


[34m[1mwandb[0m: Run data is saved locally in [35m[1m/home/guest/Desktop/projects/third-experiments/domain_adaptation_project/mixed/mixed/wandb/run-20240830_025217-qunufo60[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.


[34m[1mwandb[0m: Syncing run [33mSTR_inv_run_with_seed_100[0m


[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/mrawhani/mixed_edited[0m


[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/mrawhani/mixed_edited/runs/qunufo60[0m


Seed set to 100




Batch size: 32


Source genre: slate
Target genre: travel
Number of target samples: 69615


Source genre: slate
Target genre: travel
Number of target samples: 69615
Source dataset length: 69575
Target dataset length: 24519


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/lightning_fabric/connector.py:563: `precision=16` is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
Using 16bit Automatic Mixed Precision (AMP)


GPU available: True (cuda), used: True


TPU available: False, using: 0 TPU cores


IPU available: False, using: 0 IPUs


HPU available: False, using: 0 HPUs


`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..


`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..


`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/pytorch_lightning/loggers/wandb.py:396: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/pytorch_lightning/core/optimizer.py:257: Found unsupported keys in the lr scheduler dict: {'reduce_lr_on_plateau'}. HINT: remove them from the output of `configure_optimizers`.

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | BertAdapterModel   | 118 M 
1 | criterion     | CrossEntropyLoss   | 0     
2 | mlm_criterion | CrossEntropyLoss   | 0     
3 | accuracy      | MulticlassAccuracy | 0     
4 | f1            | MulticlassF1Score  | 0     
5 | f1_macro      | MulticlassF1Score  | 0     
6 | softmax       | Softmax            | 0     
-----------------------------------------------------
9.2 M     Trainable params
109 M     Non-trainable params
118 M     Total params
474.833   Total estimated model params size (MB)


eee


Sanity Checking: |                                                                                            …

val/accuracy: 0.265625
val/f1: 0.38245612382888794
val/taskclf_loss: 1.12371826171875
val/loss: 1.2546296119689941
val/mlm_loss: 1.6261029243469238


Training: |                                                                                                   …

Validation: |                                                                                                 …

val/accuracy: 0.7278640270233154
val/f1: 0.7286763787269592
val/taskclf_loss: 0.6458033919334412
val/loss: 0.8882117867469788
val/mlm_loss: 1.5760685205459595


Validation: |                                                                                                 …

val/accuracy: 0.7445628046989441
val/f1: 0.7456532120704651
val/taskclf_loss: 0.6187389492988586
val/loss: 0.8667240142822266
val/mlm_loss: 1.5704050064086914


Validation: |                                                                                                 …

val/accuracy: 0.7497348785400391
val/f1: 0.7506481409072876
val/taskclf_loss: 0.6193493008613586
val/loss: 0.8604008555412292
val/mlm_loss: 1.5444071292877197


Validation: |                                                                                                 …

val/accuracy: 0.7505096793174744
val/f1: 0.7518508434295654
val/taskclf_loss: 0.6539692878723145
val/loss: 0.8878129720687866
val/mlm_loss: 1.5513668060302734


Validation: |                                                                                                 …

val/accuracy: 0.761655867099762
val/f1: 0.7620723247528076
val/taskclf_loss: 0.7463045716285706
val/loss: 0.9509256482124329
val/mlm_loss: 1.5315574407577515


Validation: |                                                                                                 …

val/accuracy: 0.7633345723152161
val/f1: 0.7634052634239197
val/taskclf_loss: 0.7235642671585083
val/loss: 0.9368677139282227
val/mlm_loss: 1.5421364307403564


Validation: |                                                                                                 …

val/accuracy: 0.7630763053894043
val/f1: 0.7631457448005676
val/taskclf_loss: 0.7214900851249695
val/loss: 0.9333357214927673
val/mlm_loss: 1.5344678163528442


Validation: |                                                                                                 …

val/accuracy: 0.7630763053894043
val/f1: 0.7631497383117676
val/taskclf_loss: 0.7212353944778442
val/loss: 0.9365770220756531
val/mlm_loss: 1.5476289987564087


Validation: |                                                                                                 …

val/accuracy: 0.7628180384635925
val/f1: 0.7628856301307678
val/taskclf_loss: 0.7210573554039001
val/loss: 0.9308145642280579
val/mlm_loss: 1.5260205268859863


Validation: |                                                                                                 …

`Trainer.fit` stopped: `max_epochs=10` reached.


val/accuracy: 0.7625597715377808
val/f1: 0.7626314759254456
val/taskclf_loss: 0.7208372950553894
val/loss: 0.9304618239402771
val/mlm_loss: 1.5252914428710938


Best checkpoint path: ./lightning_logs/qunufo60/checkpoints/task-STR-epoch=02-val_loss=0.86.ckpt
Saved epoch checkpoint path: ./lightning_logs/qunufo60/checkpoints/STR-epoch=05.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Source genre: slate
Target genre: travel
Number of target samples: 69615


Testing: |                                                                                                    …

Test Results Last Epoch: [{'source_test/loss': 0.7625701427459717, 'source_test/accuracy': 0.7523521184921265, 'source_test/f1': 0.7514972686767578, 'source_test/f1_macro': 0.7400442361831665, 'target_test/loss': 0.6263136863708496, 'target_test/accuracy': 0.7881383895874023, 'target_test/f1': 0.7879767417907715, 'target_test/f1_macro': 0.7789142727851868}]
Best checkpoint path: ./lightning_logs/qunufo60/checkpoints/task-STR-epoch=02-val_loss=0.86.ckpt
Saved epoch checkpoint path: ./lightning_logs/qunufo60/checkpoints/STR-epoch=05.ckpt


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results on Best Model: [{'source_test/loss': 0.6634132862091064, 'source_test/accuracy': 0.741767406463623, 'source_test/f1': 0.7424407601356506, 'source_test/f1_macro': 0.7284212112426758, 'target_test/loss': 0.5529816746711731, 'target_test/accuracy': 0.785618245601654, 'target_test/f1': 0.7873944640159607, 'target_test/f1_macro': 0.7748045921325684}]


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results on saved epoch: [{'source_test/loss': 0.766530454158783, 'source_test/accuracy': 0.7528561353683472, 'source_test/f1': 0.7519171237945557, 'source_test/f1_macro': 0.7406654357910156, 'target_test/loss': 0.6293236613273621, 'target_test/accuracy': 0.788474440574646, 'target_test/f1': 0.7882111668586731, 'target_test/f1_macro': 0.7798805236816406}]


[34m[1mwandb[0m: - 0.003 MB of 0.003 MB uploaded

[34m[1mwandb[0m: \ 0.003 MB of 0.005 MB uploaded

[34m[1mwandb[0m: | 0.016 MB of 0.016 MB uploaded

[34m[1mwandb[0m: / 0.016 MB of 0.016 MB uploaded

[34m[1mwandb[0m: - 0.016 MB of 0.016 MB uploaded

[34m[1mwandb[0m:                                                                                


[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:                epoch ▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇█
[34m[1mwandb[0m: source_test/accuracy █▁█
[34m[1mwandb[0m:       source_test/f1 █▁█
[34m[1mwandb[0m: source_test/f1_macro █▁█
[34m[1mwandb[0m:     source_test/loss █▁█
[34m[1mwandb[0m: target_test/accuracy ▇▁█
[34m[1mwandb[0m:       target_test/f1 ▆▁█
[34m[1mwandb[0m: target_test/f1_macro ▇▁█
[34m[1mwandb[0m:     target_test/loss █▁█
[34m[1mwandb[0m:       train/accuracy ▃▅▂▃▂▅▄▃▄▅▄▄▄▅▂▁▆▇▆▆▆▅▄▇▅▆▄█▆▆▆▆▃▆█▇▅▆▇▆
[34m[1mwandb[0m:             train/f1 ▃▅▂▃▂▅▄▃▄▅▄▄▄▅▂▁▆▇▆▆▆▅▄▇▅▆▄█▆▆▆▆▃▆█▇▅▆▇▆
[34m[1mwandb[0m:           train/loss ▇▄▆▇▇▅▅▅▆▄▅▅▅▄██▃▃▃▄▄▃▆▂▃▃▆▁▄▂▂▃▆▄▁▂▅▄▁▃
[34m[1mwandb[0m:       train/mlm_loss ▄▃▄▄▄▃▅▄▃▄▂▃▃▄▄▅▁▄▄▄▃▃▄▄▁▂█▃▃▃▃▄▃▄▄▃▅▅▂▃
[34m[1mwandb[0m:   train/taskclf_loss █▅▆█▇▅▅▅▆▄▅▆▅▄██▃▂▃▄▄▃▆▂▄▃▅▁▄▂▃▃▇▄▁▂▅▄▁▃
[34m[1mwandb[0m:  trainer/global_step ▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
[34m[1mwan

[34m[1mwandb[0m: 🚀 View run [33mSTR_inv_run_with_seed_100[0m at: [34m[4mhttps://wandb.ai/mrawhani/mixed_edited/runs/qunufo60[0m
[34m[1mwandb[0m: ⭐️ View project at: [34m[4mhttps://wandb.ai/mrawhani/mixed_edited[0m
[34m[1mwandb[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)


[34m[1mwandb[0m: Find logs at: [35m[1m./wandb/run-20240830_025217-qunufo60/logs[0m


In [5]:
results.items()

dict_items([('last_epoch', {'source_test/loss': [0.7335346937179565, 0.7133637070655823, 0.7625701427459717], 'source_test/accuracy': [0.7464717626571655, 0.7513440251350403, 0.7523521184921265], 'source_test/f1': [0.7459375262260437, 0.7517113089561462, 0.7514972686767578], 'source_test/f1_macro': [0.7360235452651978, 0.7410829663276672, 0.7400442361831665], 'target_test/loss': [0.6007658243179321, 0.6011308431625366, 0.6263136863708496], 'target_test/accuracy': [0.7758736610412598, 0.7787297964096069, 0.7881383895874023], 'target_test/f1': [0.7759540677070618, 0.7792301774024963, 0.7879767417907715], 'target_test/f1_macro': [0.7660940885543823, 0.7676406502723694, 0.7789142727851868]}), ('best_model', {'source_test/loss': [0.6768397688865662, 0.68798828125, 0.6634132862091064], 'source_test/accuracy': [0.7236222624778748, 0.7224462628364563, 0.741767406463623], 'source_test/f1': [0.72342848777771, 0.7220959663391113, 0.7424407601356506], 'source_test/f1_macro': [0.7134289145469666, 0

In [6]:
# Calculate mean and standard deviation for each scenario
mean_results = {scenario: {key: np.mean(values) for key, values in metrics.items()} for scenario, metrics in results.items()}
std_results = {scenario: {key: np.std(values) for key, values in metrics.items()} for scenario, metrics in results.items()}

# Log mean and standard deviation results to wandb
wandb.init(project=project_name, name=f'{domain}_mean_results')
for scenario in mean_results:
    for key, value in mean_results[scenario].items():
        wandb.log({f"{scenario}/{key}": value})
        wandb.log({f"{scenario}/{key}_std": std_results[scenario][key]})
wandb.finish()

print("Mean Results:", mean_results)
print("Standard Deviation Results:", std_results)

# # Save the best model's adapter
# if model:
#     adapter_save_path = f"../../saved/adapter_after_run/{hparams['task_adapter_name']}"
#     model.save_adapter(adapter_save_path, hparams['task_adapter_name'])
#     print(f"Adapter saved to {adapter_save_path}")
# else:
#     print("No best model to save.")

[34m[1mwandb[0m: - Waiting for wandb.init()...

[34m[1mwandb[0m: \ Waiting for wandb.init()...

[34m[1mwandb[0m: wandb version 0.17.8 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[34m[1mwandb[0m: Tracking run with wandb version 0.17.0


[34m[1mwandb[0m: Run data is saved locally in [35m[1m/home/guest/Desktop/projects/third-experiments/domain_adaptation_project/mixed/mixed/wandb/run-20240830_035728-3pk4fjt7[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.


[34m[1mwandb[0m: Syncing run [33mSTR_mean_results[0m


[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/mrawhani/mixed_edited[0m


[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/mrawhani/mixed_edited/runs/3pk4fjt7[0m


[34m[1mwandb[0m: - 0.010 MB of 0.010 MB uploaded

[34m[1mwandb[0m: \ 0.010 MB of 0.010 MB uploaded

[34m[1mwandb[0m: | 0.010 MB of 0.010 MB uploaded

[34m[1mwandb[0m:                                                                                


[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:      best_model/source_test/accuracy ▁
[34m[1mwandb[0m:  best_model/source_test/accuracy_std ▁
[34m[1mwandb[0m:            best_model/source_test/f1 ▁
[34m[1mwandb[0m:      best_model/source_test/f1_macro ▁
[34m[1mwandb[0m:  best_model/source_test/f1_macro_std ▁
[34m[1mwandb[0m:        best_model/source_test/f1_std ▁
[34m[1mwandb[0m:          best_model/source_test/loss ▁
[34m[1mwandb[0m:      best_model/source_test/loss_std ▁
[34m[1mwandb[0m:      best_model/target_test/accuracy ▁
[34m[1mwandb[0m:  best_model/target_test/accuracy_std ▁
[34m[1mwandb[0m:            best_model/target_test/f1 ▁
[34m[1mwandb[0m:      best_model/target_test/f1_macro ▁
[34m[1mwandb[0m:  best_model/target_test/f1_macro_std ▁
[34m[1mwandb[0m:        best_model/target_test/f1_std ▁
[34m[1mwandb[0m:          best_model/target_test/loss ▁
[34m[1mwandb[0m:      best_model/target_test/loss_std ▁
[

[34m[1mwandb[0m: 🚀 View run [33mSTR_mean_results[0m at: [34m[4mhttps://wandb.ai/mrawhani/mixed_edited/runs/3pk4fjt7[0m
[34m[1mwandb[0m: ⭐️ View project at: [34m[4mhttps://wandb.ai/mrawhani/mixed_edited[0m
[34m[1mwandb[0m: Synced 4 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)


[34m[1mwandb[0m: Find logs at: [35m[1m./wandb/run-20240830_035728-3pk4fjt7/logs[0m


Mean Results: {'last_epoch': {'source_test/loss': 0.7364895145098368, 'source_test/accuracy': 0.7500559687614441, 'source_test/f1': 0.7497153679529825, 'source_test/f1_macro': 0.7390502492586771, 'target_test/loss': 0.6094034512837728, 'target_test/accuracy': 0.7809139490127563, 'target_test/f1': 0.7810536623001099, 'target_test/f1_macro': 0.7708830038706461}, 'best_model': {'source_test/loss': 0.6760804454485575, 'source_test/accuracy': 0.7292786439259847, 'source_test/f1': 0.7293217380841573, 'source_test/f1_macro': 0.7176064848899841, 'target_test/loss': 0.5604847073554993, 'target_test/accuracy': 0.7734654744466146, 'target_test/f1': 0.7743110060691833, 'target_test/f1_macro': 0.7637060284614563}, 'epoch_saved': {'source_test/loss': 0.7386875549952189, 'source_test/accuracy': 0.7497199376424154, 'source_test/f1': 0.7493324875831604, 'source_test/f1_macro': 0.7387795249621073, 'target_test/loss': 0.6111891865730286, 'target_test/accuracy': 0.7801859180132548, 'target_test/f1': 0.780

In [7]:
print('dones')

dones


In [8]:
best_val_loss

inf