In [17]:

import torch
import gc
torch.cuda.empty_cache()
gc.collect()


19274

In [18]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["OMP_NUM_THREADS"] = "1"

# Step 2: Import necessary libraries
from typing import Optional, Dict, Any
import os
import torch
import pytorch_lightning as pl
import pandas as pd
from transformers import AutoTokenizer
from torch.utils.data import Dataset, DataLoader
from rich.traceback import install
from pytorch_lightning.loggers import WandbLogger
import torch.nn as nn
import torch.optim as optim
from transformers import AutoConfig, get_cosine_schedule_with_warmup,BertForSequenceClassification
from adapters import AutoAdapterModel, AdapterConfig
from adapters.composition import Stack
from pytorch_lightning import seed_everything, Trainer
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
import torchmetrics
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import numpy as np

install(show_locals=True)

from setup import setup_src_path
print(setup_src_path())
import data.processed as processed
import config.config as config
import utils.setup as setup
import utils.functions as fn
from importlib import reload

from datasets import load_from_disk

print(config.Config.TXT_SAVE_PATH)
print(config.Config.MODEL_SAVE_PATH)



['/home/guest/Desktop/projects/third-experiments/SDA_experiments/mlm', '/usr/lib/python38.zip', '/usr/lib/python3.8', '/usr/lib/python3.8/lib-dynload', '', '/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages', '/tmp/tmpdoj1tcrw', '/home/guest/Desktop/projects/third-experiments/SDA_experiments/modules']
./text-files/
./hp-model-


In [19]:
from typing import Optional, Dict, Any
from rich.traceback import install
import ntpath
import os
import torch
import pytorch_lightning as pl
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch.utils.data import Dataset, DataLoader

# Install rich traceback
install(show_locals=True)

class SADataModuleSourceTarget(pl.LightningDataModule):
    def __init__(self, hparams: Dict[str, Any]):
        super(SADataModuleSourceTarget, self).__init__()
        os.environ["TOKENIZERS_PARALLELISM"] = "True"
        self.dataset_cache_dir = hparams["dataset_cache_dir"]
        self.source_target = hparams["source_target"]
        self.pretrained_model_name = hparams["pretrained_model_name"]
        self.padding = hparams["padding"]
        self.max_seq_length = hparams["max_seq_length"]
        self.batch_size = hparams["bsz"]
        self.tokenizer = AutoTokenizer.from_pretrained(self.pretrained_model_name, use_fast=True)
        self.train_dataset = None
        self.val_dataset = None
        self.test_dataset = None

    def prepare_data(self):
        SourceTargetDataset(
            source_filepath=os.path.join(self.dataset_cache_dir, "sa", self.source_target, "train_source.csv"),
            target_filepath=os.path.join(self.dataset_cache_dir, "sa", self.source_target, "target_unlabelled.csv"),
            tokenizer=self.tokenizer,
            padding=self.padding,
            max_seq_length=self.max_seq_length,
        )
        SourceTargetDataset(
            source_filepath=os.path.join(self.dataset_cache_dir, "sa", self.source_target, "dev_source.csv"),
            target_filepath=os.path.join(self.dataset_cache_dir, "sa", self.source_target, "dev_target.csv"),
            tokenizer=self.tokenizer,
            padding=self.padding,
            max_seq_length=self.max_seq_length,
        )
        SourceTargetDataset(
            source_filepath=os.path.join(self.dataset_cache_dir, "sa", self.source_target, "test_source.csv"),
            target_filepath=os.path.join(self.dataset_cache_dir, "sa", self.source_target, "test_target.csv"),
            tokenizer=self.tokenizer,
            padding=self.padding,
            max_seq_length=self.max_seq_length,
        )

    def setup(self, stage: Optional[str] = None):
        train_dataset = SourceTargetDataset(
            source_filepath=os.path.join(self.dataset_cache_dir, "sa", self.source_target, "train_source.csv"),
            target_filepath=os.path.join(self.dataset_cache_dir, "sa", self.source_target, "target_unlabelled.csv"),
            tokenizer=self.tokenizer,
            padding=self.padding,
            max_seq_length=self.max_seq_length,
        )
        val_dataset = SourceTargetDataset(
            source_filepath=os.path.join(self.dataset_cache_dir, "sa", self.source_target, "dev_source.csv"),
            target_filepath=os.path.join(self.dataset_cache_dir, "sa", self.source_target, "dev_target.csv"),
            tokenizer=self.tokenizer,
            padding=self.padding,
            max_seq_length=self.max_seq_length,
        )
        test_dataset = SourceTargetDataset(
            source_filepath=os.path.join(self.dataset_cache_dir, "sa", self.source_target, "test_source.csv"),
            target_filepath=os.path.join(self.dataset_cache_dir, "sa", self.source_target, "test_target.csv"),
            tokenizer=self.tokenizer,
            padding=self.padding,
            max_seq_length=self.max_seq_length,
        )
        if stage == "fit":
            self.train_dataset = train_dataset
            self.val_dataset = val_dataset
        elif stage == "test":
            print('test')
            self.test_dataset = test_dataset

    def train_dataloader(self):
        print(f"Training Dataset: {len(self.train_dataset)} samples")
        return DataLoader(self.train_dataset, batch_size=self.batch_size, num_workers=16)

    def val_dataloader(self):
        print(f"Validation Dataset: {len(self.val_dataset)} samples")
        return DataLoader(self.val_dataset, batch_size=self.batch_size, num_workers=16)

    def test_dataloader(self):
        print(f"Test Dataset: {len(self.test_dataset)} samples")
        return DataLoader(self.test_dataset, batch_size=self.batch_size, num_workers=16)


class SourceTargetDataset(Dataset):
    def __init__(self, source_filepath, target_filepath, tokenizer, padding, max_seq_length):
        self.tokenizer = tokenizer
        self.max_seq_length = max_seq_length
        self.padding = padding
        self.source_df = pd.read_csv(source_filepath)
        self.target_df = pd.read_csv(target_filepath)
        self.target_filename = ntpath.basename(target_filepath)

    def __getitem__(self, index):
        sentence = self.source_df.iloc[index]["sentence"]
        label_source = self.source_df.iloc[index]["label"]

        encoded_input = self.tokenizer(
            str(sentence),
            max_length=self.max_seq_length,
            truncation=True,
            padding=self.padding,
        )
        source_input_ids = encoded_input["input_ids"]
        source_attention_mask = encoded_input["attention_mask"]

        sentence = self.target_df.iloc[index]["sentence"]
        encoded_input = self.tokenizer(
            str(sentence),
            max_length=self.max_seq_length,
            truncation=True,
            padding=self.padding,
        )
        target_input_ids = encoded_input["input_ids"]
        target_attention_mask = encoded_input["attention_mask"]
        if "unlabelled" not in self.target_filename:
            label_target = self.target_df.iloc[index]["label"]
            data_input = {
                "source_input_ids": torch.tensor(source_input_ids),
                "source_attention_mask": torch.tensor(source_attention_mask),
                "target_input_ids": torch.tensor(target_input_ids),
                "target_attention_mask": torch.tensor(target_attention_mask),
                "label_source": torch.tensor(label_source, dtype=torch.long),
                "label_target": torch.tensor(label_target, dtype=torch.long),
            }
        else:
            data_input = {
                "source_input_ids": torch.tensor(source_input_ids),
                "source_attention_mask": torch.tensor(source_attention_mask),
                "target_input_ids": torch.tensor(target_input_ids),
                "target_attention_mask": torch.tensor(target_attention_mask),
                "label_source": torch.tensor(label_source, dtype=torch.long),
            }

        return data_input

    def __len__(self):
        return min(self.source_df.shape[0], self.target_df.shape[0])



In [20]:
# Step 4: Define the DomainTaskAdapter class



class DomainTaskAdapter(pl.LightningModule):
    def __init__(self, hparams):
        super(DomainTaskAdapter, self).__init__()
        self.save_hyperparameters(hparams)
        self.config = AutoConfig.from_pretrained(self.hparams["pretrained_model_name"],num_labels=self.hparams["num_classes"])
        self.config.output_hidden_states = True
        self.model = BertForSequenceClassification.from_pretrained(self.hparams["pretrained_model_name"], config=self.config)
        
        self.reduction_factor = self.hparams.get("reduction_factor", 16)
        if self.reduction_factor == "None":
            self.reduction_factor = 16
        self.leave_out = self.hparams.get("leave_out", [])
       
        self.saved_adapter_dir = self.hparams["saved_adapter_dir"]
        self.domain_adapter_name = self.hparams["domain_adapter_name"]
        
        adapter_config = AdapterConfig.load("lora", r=8, alpha=16)
        
        self.task_adapter_name = self.hparams["task_adapter_name"]
        # self.model.add_adapter(self.task_adapter_name, config=adapter_config)

        # self.model.load_adapter(f"{self.saved_adapter_dir}/{self.domain_adapter_name}", with_head=False)
        # self.model.add_classification_head(self.task_adapter_name, num_labels=self.hparams["num_classes"])
        # self.model.active_adapters = Stack(self.domain_adapter_name, self.task_adapter_name)

        # self.model.train_adapter(Stack(self.domain_adapter_name, self.task_adapter_name))
        # print(self.model.adapter_summary())
        print(fn.print_trainable_parameters(self.model))

        self.training_outputs = []
        self.validation_outputs = []
        self.test_outputs = []
        self.criterion = nn.CrossEntropyLoss()
        self.accuracy = torchmetrics.Accuracy(task='multiclass',                                           
                                     num_classes=self.hparams["num_classes"])
        self.f1 = torchmetrics.F1Score(task='multiclass',num_classes=self.hparams["num_classes"], average="macro")
        self.softmax = nn.Softmax(dim=1)
        self.entropy_values = []  # For entropy minimization
    def forward(self, input_ids, attention_mask):
        outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
        return outputs.logits

    def training_step(self, batch, batch_idx):
        input_ids, attention_mask = batch["source_input_ids"], batch["source_attention_mask"]
        labels = batch["label_source"]
        logits = self(input_ids=input_ids, attention_mask=attention_mask)
        loss = self.criterion(logits, labels)
        accuracy = self.accuracy(labels, torch.argmax(self.softmax(logits), dim=1))
        f1 = self.f1(labels, torch.argmax(self.softmax(logits), dim=1))
        # self.training_outputs.append({
        #     "train_loss": loss,
        #     "train_accuracy":accuracy,
        #     "train_f1":f1,
        #     })
        self.log("train_loss", loss)
        self.log("train_accuracy", accuracy)
        self.log("train_f1", f1)
        
        return loss
   
    def validation_step(self, batch, batch_idx):
        """validation step of DomainTaskAdapter"""
        # get the input ids and attention mask for source data
        input_ids, attention_mask = batch["source_input_ids"], batch["source_attention_mask"]
        logits = self(input_ids=input_ids, attention_mask=attention_mask)
        labels = batch["label_source"]
        source_loss = self.criterion(logits, labels)
        source_accuracy = self.accuracy(labels, torch.argmax(self.softmax(logits), dim=1))
        source_f1 = self.f1(labels, torch.argmax(self.softmax(logits), dim=1))


        # get the input ids and attention mask for target data
        input_ids, attention_mask = batch["target_input_ids"], batch["target_attention_mask"]
        logits  = self(input_ids=input_ids, attention_mask=attention_mask)
        labels = batch["label_target"]
        target_loss = self.criterion(logits, labels)
        target_accuracy = self.accuracy(labels, torch.argmax(self.softmax(logits), dim=1))
        target_f1 = self.f1(labels, torch.argmax(self.softmax(logits), dim=1))
        # # Entropy minimization - calculate and log entropy
        # probs = self.softmax(logits).cpu().numpy()
        # entropy = -np.sum(probs * np.log(probs + 1e-10), axis=1)
        # avg_entropy = np.mean(entropy)
        # self.entropy_values.append(avg_entropy)
     
        # this will log the mean div value across epoch
        self.log(name="source_val/loss", value=source_loss, prog_bar=True, logger=True)
        self.log(name="source_val/accuracy", value=source_accuracy, prog_bar=True, logger=True)
        self.log(name="source_val/f1", value=source_f1, prog_bar=True, logger=True)
        self.log(name="target_val/loss", value=target_loss, prog_bar=True, logger=True)
        self.log(name="target_val/accuracy", value=target_accuracy, prog_bar=True, logger=True)
        self.log(name="target_val/f1", value=target_f1, prog_bar=True, logger=True)
        
        self.validation_outputs.append({
            "source_val/loss": source_loss,
            "source_val/accuracy": source_accuracy,
            "source_val/f1": source_f1,
            "target_val/loss": target_loss,
            "target_val/accuracy": target_accuracy,
            "target_val/f1": target_f1,
            # "features": target_features.cpu(),  # Collect features for t-SNE
            # "logits": logits.cpu(),  # Collect logits for confusion matrix
            # "labels": labels.cpu()  # Collect labels for confusion matrix   
                })
        return {
            "source_val/loss": source_loss,
            "source_val/accuracy": source_accuracy,
            "source_val/f1": source_f1,
            "target_val/loss": target_loss,
            "target_val/accuracy": target_accuracy,
            "target_val/f1": target_f1,
            # "features": target_features.cpu(),  # Collect features for t-SNE
            # "logits": logits.cpu(),  # Collect logits for confusion matrix
            # "labels": labels.cpu()  # Collect labels for confusion matrix   
                            }
    def on_validation_epoch_start(self):
        self.validation_outputs = []
    
    def on_validation_epoch_end(self):
        try:
            outputs= self.validation_outputs
            mean_source_loss = torch.stack([x["source_val/loss"] for x in outputs]).mean()
            mean_source_accuracy = torch.stack([x["source_val/accuracy"] for x in outputs]).mean()
            mean_source_f1 = torch.stack([x["source_val/f1"] for x in outputs]).mean()

            mean_target_loss = torch.stack([x["target_val/loss"] for x in outputs]).mean()
            mean_target_accuracy = torch.stack([x["target_val/accuracy"] for x in outputs]).mean()
            mean_target_f1 = torch.stack([x["target_val/f1"] for x in outputs]).mean()
            print(f"target_val/loss: {mean_target_loss}")
            print(f"target_val/accuracy: {mean_target_accuracy}")
            print(f"target_val/f1: {mean_target_f1}")
            print(f"source_val/loss: {mean_source_loss}")
            print(f"source_val/accuracy: {mean_source_accuracy}")
            print(f"source_val/f1: {mean_source_f1}")

            self.log(name="source_val/loss", value=mean_source_loss, prog_bar=True, logger=True)
            self.log(name="source_val/accuracy", value=mean_source_accuracy, prog_bar=True, logger=True)
            self.log(name="target_val/loss", value=mean_target_loss, prog_bar=True, logger=True)
            self.log(name="target_val/accuracy", value=mean_target_accuracy, prog_bar=True, logger=True)
            self.log(name="target_val/f1", value=mean_target_f1, prog_bar=True, logger=True)
            self.log(name="source_val/f1", value=mean_source_f1, prog_bar=True, logger=True)
        
        
            self.log("val_loss", mean_source_loss)
            # Generate and log visualizations
            # if hasattr(self.trainer, 'current_epoch'):
            #     self.plot_tsne(outputs, epoch=self.trainer.current_epoch, phase='validation')
            #     self.plot_confusion_matrix(outputs, phase='validation')
        except Exception as e:
            print(f"Error during on_validation_epoch_end: {e}")
            raise

    def test_step(self, batch, batch_idx):
        """validation step of DomainTaskAdapter"""
        # get the input ids and attention mask for source data
        input_ids, attention_mask = batch["source_input_ids"], batch["source_attention_mask"]
        logits  = self(input_ids=input_ids, attention_mask=attention_mask)
        labels = batch["label_source"]
        source_loss = self.criterion(logits, labels)
        source_accuracy = self.accuracy(labels, torch.argmax(self.softmax(logits), dim=1))
        source_f1 = self.f1(labels, torch.argmax(self.softmax(logits), dim=1))

        # get the input ids and attention mask for target data
        input_ids, attention_mask = batch["target_input_ids"], batch["target_attention_mask"]
        logits = self(input_ids=input_ids, attention_mask=attention_mask)
        labels = batch["label_target"]
        target_loss = self.criterion(logits, labels)
        target_accuracy = self.accuracy(labels, torch.argmax(self.softmax(logits), dim=1))
        target_f1 = self.f1(labels, torch.argmax(self.softmax(logits), dim=1))

       

        self.log(name="source_test/loss", value=source_loss, logger=True)
        self.log(name="source_test/accuracy", value=source_accuracy, logger=True)
        self.log(name="source_test/f1", value=source_f1, logger=True)
        self.log(name="target_test/loss", value=target_loss, logger=True)
        self.log(name="target_test/accuracy", value=target_accuracy, logger=True)
        self.log(name="target_test/f1", value=target_f1, logger=True)
        
        self.test_outputs.append({
            "source_test/loss": source_loss,
            "source_test/accuracy": source_accuracy,
            "source_test/f1": source_f1,
            "target_test/loss": target_loss,
            "target_test/accuracy": target_accuracy,
            "target_test/f1": target_f1,
            # "features": target_features.cpu(),  # Collect features for t-SNE
            # "logits": logits.cpu(),  # Collect logits for confusion matrix
            # "labels": labels.cpu()  # Collect labels for confusion matrix   
        })
        return {
            "source_test/loss": source_loss,
            "source_test/accuracy": source_accuracy,
            "source_test/f1": source_f1,
            "target_test/loss": target_loss,
            "target_test/accuracy": target_accuracy,
            "target_test/f1": target_f1,
            # "features": target_features.cpu(),  # Collect features for t-SNE
            # "logits": logits.cpu(),  # Collect logits for confusion matrix
            # "labels": labels.cpu()  # Collect labels for confusion matrix   
        }
    def on_test_epoch_start(self):
        self.test_outputs = []
    def on_test_epoch_end(self):
        try:
            outputs=  self.test_outputs
            mean_source_loss = torch.stack([x["source_test/loss"] for x in outputs]).mean()
            mean_source_accuracy = torch.stack([x["source_test/accuracy"] for x in outputs]).mean()
            mean_source_f1 = torch.stack([x["source_test/f1"] for x in outputs]).mean()

            mean_target_loss = torch.stack([x["target_test/loss"] for x in outputs]).mean()
            mean_target_accuracy = torch.stack([x["target_test/accuracy"] for x in outputs]).mean()
            mean_target_f1 = torch.stack([x["target_test/f1"] for x in outputs]).mean()

            self.log(name="source_test/loss", value=mean_source_loss)
            self.log(name="source_test/accuracy", value=mean_source_accuracy)
            self.log(name="source_test/f1", value=mean_source_f1)
            self.log(name="target_test/loss", value=mean_target_loss)
            self.log(name="target_test/accuracy", value=mean_target_accuracy)
            self.log(name="target_test/f1", value=mean_target_f1)

            # # Generate and log visualizations
            # if hasattr(self.trainer, 'current_epoch'):
            #     self.plot_tsne(outputs, epoch=self.trainer.current_epoch, phase='test')
            #     self.plot_confusion_matrix(outputs, phase='test')
        except Exception as e:
            print(f"Error during on_test_epoch_end: {e}")
            raise
    def save_adapter(self, location, adapter_name):
        self.model.save_adapter(location, adapter_name)
    def configure_optimizers(self):
        optimizer = optim.AdamW(self.parameters(), lr=self.hparams["learning_rate"])
        lr_scheduler = {
            'scheduler': optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=2, threshold=0.0001, cooldown=0, min_lr=1e-8),
            'monitor': 'val_loss'
        }
        return [optimizer], [lr_scheduler]
    def plot_tsne(self, outputs, epoch, phase):
        try:
            features = []
            labels = []
            for output in outputs:
                features.extend(output["features"].numpy())  # Use target features
                labels.extend(output["labels"].numpy())  # Use target labels

            features = np.array(features)
            labels = np.array(labels)
            print(f"Features shape: {features.shape}")
            print(f"Labels shape: {labels.shape}")

            # Flatten features if necessary
            if features.ndim > 2:
                features = features.reshape(features.shape[0], -1)
                print(f"Flattened features shape: {features.shape}")

            tsne = TSNE(n_components=2)
            tsne_results = tsne.fit_transform(features)
            plt.figure(figsize=(10, 6))
            for i in range(self.hparams["num_classes"]):
                idxs = np.where(labels == i)
                plt.scatter(tsne_results[idxs, 0], tsne_results[idxs, 1], label=f'Class {i}')
            plt.legend()
            plt.title(f't-SNE plot {phase} Epoch {epoch}')
            plt.show()  # Display the plot inline
        except Exception as e:
            print(f"Error during t-SNE plotting: {e}")
            raise

    def plot_confusion_matrix(self, outputs, phase):
        try:
            y_true = []
            y_pred = []
            for output in outputs:
                y_true.extend(output["labels"].numpy())  # Use target labels
                y_pred.extend(torch.argmax(output["logits"], dim=1).numpy())  # Use predicted labels from logits

            y_true = np.array(y_true)
            y_pred = np.array(y_pred)
            print(f"y_true shape: {y_true.shape}")
            print(f"y_pred shape: {y_pred.shape}")

            cm = confusion_matrix(y_true, y_pred)
            disp = ConfusionMatrixDisplay(confusion_matrix=cm)
            disp.plot()
            plt.title(f'Confusion Matrix {phase}')
            plt.show()  # Display the plot inline
        except Exception as e:
            print(f"Error during confusion matrix plotting: {e}")
            raise

In [21]:
# Step 5: Training and Evaluation Loop with Wandb logging
import wandb
wandb.login()
# Wandb setup and training loop
seeds = [42,10,100]  # List of seeds
project_name = 'sda'  # Replace with your wandb project name
domain = 'CBA'  # Replace with the specific domain for this notebook
type = 'invLora'  # Replace with the specific type for this notebook
domain_aprev ='CBA'

# Initialize results dictionary
results = {
    "last_epoch": {
        
        "source_test/loss": [],
        "source_test/accuracy": [],
        "source_test/f1": [],
        "target_test/loss": [],
        "target_test/accuracy": [],
        "target_test/f1": [],
    },
    "best_model": {
        
        "source_test/loss": [],
        "source_test/accuracy": [],
        "source_test/f1": [],
        "target_test/loss": [],
        "target_test/accuracy": [],
        "target_test/f1": [],
    },
    "epoch_saved": {
        
        "source_test/loss": [],
        "source_test/accuracy": [],
        "source_test/f1": [],
        "target_test/loss": [],
        "target_test/accuracy": [],
        "target_test/f1": [],
    }
}

best_val_loss = float('inf')
best_model = None
best_model_path = ""


In [22]:
try:
    hparams = {
        "source_target": "camera_photo_baby",
        "dataset_cache_dir": "./../../datasets",
        "pretrained_model_name": "bert-base-uncased",
        "padding": True,
        "max_seq_length": 128,
        "bsz": 32,
        "source_domain": "camera_photo",
        "target_domain": "baby",
        "domain_adapter_name": "mlm_inv_baby",
        "task_adapter_name": "task_CBA",
        "pretrained_model_name": "bert-base-uncased",
        "padding": "max_length",
        "max_seq_length": 128,
        "bsz": 32,
        "num_classes": 3,
        "learning_rate": 1e-4,
        "reduction_factor": 16,
        "mode": "domain",
        "saved_adapter_dir": "../../saved/adapters",
    }
    dm = SADataModuleSourceTarget(hparams)
    dm.setup('fit')
    dm.setup("test")
    train_loader = dm.train_dataloader()
    val_loader = dm.val_dataloader()
    print(train_loader)
except Exception as e:
    print(e)




test
Training Dataset: 1350 samples
Validation Dataset: 150 samples
<torch.utils.data.dataloader.DataLoader object at 0x7fb634413100>




In [23]:
for seed in seeds:
    wandb.init(project=project_name, name=f'{domain}_{type}_run_with_seed_{seed}', config={'seed': seed})

    try:
        seed_everything(seed)

        hparams = {
            "source_target": "camera_photo_baby",
            "dataset_cache_dir": "../../datasets",
            "pretrained_model_name": "bert-base-uncased",
            "padding": True,
            "source_domain": "camera_photo",
            "target_domain": "baby",
            "domain_adapter_name": "mlm_inv_baby",
            "task_adapter_name": "task_CBA",
            "pretrained_model_name": "bert-base-uncased",
            "padding": "max_length",
            "max_seq_length": 128,
            "bsz": 32,
            "num_classes": 3,
            "learning_rate": 1e-4,
            "reduction_factor": 16,
            "mode": "domain",
            "saved_adapter_dir": "../saved/adapters",
        }

        save_dir = "checkpoints"
        save_epoch_3 = 6  # Save model at the 3rd epoch
        #save_model_callback_epoch = SaveModelAtEpochCallback(save_dir, save_epoch_3)
        # Add a print statement to confirm the callback initialization
        #print(f"Initialized SaveModelAtEpochCallback with save_dir={save_dir} and save_epoch={save_epoch_3}")
        dm = SADataModuleSourceTarget(hparams)
        dm.setup('fit')
        dm.setup("test")

        model = DomainTaskAdapter(hparams)

      
        
    except Exception as e:
        print(f"Error during preprocessing : {e}")   

    try:
        train_loader = dm.train_dataloader()
        val_loader = dm.val_dataloader()
        checkpoint_callback = ModelCheckpoint(
            filename="task-CBA-{epoch:02d}-{val_loss:.2f}",
            save_top_k=1,
            monitor="target_val/f1",
            mode="max",
        )
        save_model_callback_epoch = ModelCheckpoint(
                # dirpath=checkpoints_path, # <--- specify this on the trainer itself for version control
                filename="CBA-{epoch:02d}",
                every_n_epochs=save_epoch_3,
                save_top_k=-1,  # <--- this is important!
            )
       
        wandb_logger = WandbLogger()
        trainer = Trainer(
            max_epochs=10,
            accelerator="auto",
            default_root_dir="checkpoints",
            # precision=16,
            #logger=wandb_logger,
            callbacks=[checkpoint_callback,save_model_callback_epoch],
            limit_train_batches=1.0,
            limit_val_batches=1.0,
            limit_test_batches=1.0,
            # log_every_n_steps=10,
        )
      
        trainer.fit(model, train_loader, val_loader)
           # After training, print the paths to verify
        print(f"Best checkpoint path: {checkpoint_callback.best_model_path}")
        print(f"Saved epoch checkpoint path: {save_model_callback_epoch.best_model_path}")
    except Exception as e:
        print(f"Error during training : {e}")

    try:
        
        dm.setup("test")
        test_loader = dm.test_dataloader()
        test_results_last = trainer.test(model, test_loader)
        print("Test Results Last Epoch:", test_results_last)

        # Collect results for last epoch model
        for key, value in test_results_last[0].items():
            results["last_epoch"][key].append(value)

        # Paths to the saved checkpoints
        best_checkpoint_path = checkpoint_callback.best_model_path
        saved_epoch_checkpoint_path = save_model_callback_epoch.best_model_path
        # Print the paths to verify
        print(f"Best checkpoint path: {best_checkpoint_path}")
        print(f"Saved epoch checkpoint path: {saved_epoch_checkpoint_path}")
        
        best_model = DomainTaskAdapter.load_from_checkpoint(best_checkpoint_path)
        test_results_best = trainer.test(best_model, test_loader)
        print("Test Results on Best Model:", test_results_best)
        # Collect results for best model
        for key, value in test_results_best[0].items():
            results["best_model"][key].append(value)

        saved_epoch_model = DomainTaskAdapter.load_from_checkpoint(saved_epoch_checkpoint_path)
        test_results_saved_epoch = trainer.test(saved_epoch_model, test_loader)
        print("Test Results on saved epoch:", test_results_saved_epoch)
        # Collect results for 3rd epoch model
        for key, value in test_results_saved_epoch[0].items():
            results["epoch_saved"][key].append(value)
        

    except Exception as e:
        print(f"Error during testing: {e}")

    # Finish the wandb run
    wandb.finish()

Seed set to 42


test


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..
`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


trainable params: 109484547 || all params: 109484547 || trainable%: 100.0
None
Training Dataset: 1350 samples
Validation Dataset: 150 samples



  | Name      | Type                          | Params
------------------------------------------------------------
0 | model     | BertForSequenceClassification | 109 M 
1 | criterion | CrossEntropyLoss              | 0     
2 | accuracy  | MulticlassAccuracy            | 0     
3 | f1        | MulticlassF1Score             | 0     
4 | softmax   | Softmax                       | 0     
------------------------------------------------------------
109 M     Trainable params
0         Non-trainable params
109 M     Total params
437.938   Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.9243955016136169
target_val/accuracy: 0.5625
target_val/f1: 0.3597438931465149
source_val/loss: 0.929100513458252
source_val/accuracy: 0.46875
source_val/f1: 0.31884056329727173


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/pytorch_lightning/loops/fit_loop.py:298: The number of training batches (43) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.46594181656837463
target_val/accuracy: 0.7948863506317139
target_val/f1: 0.7924758791923523
source_val/loss: 0.3305152654647827
source_val/accuracy: 0.8454546332359314
source_val/f1: 0.835788369178772


Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.5977581143379211
target_val/accuracy: 0.8198863863945007
target_val/f1: 0.8186748623847961
source_val/loss: 0.3160659968852997
source_val/accuracy: 0.9227272868156433
source_val/f1: 0.9204656481742859


Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.8269553184509277
target_val/accuracy: 0.8323864340782166
target_val/f1: 0.8306450247764587
source_val/loss: 0.4884689450263977
source_val/accuracy: 0.9073864221572876
source_val/f1: 0.9053905606269836


Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.5958318710327148
target_val/accuracy: 0.8761364221572876
target_val/f1: 0.874014675617218
source_val/loss: 0.3786064088344574
source_val/accuracy: 0.8977273106575012
source_val/f1: 0.8950416445732117


Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.6169030070304871
target_val/accuracy: 0.8454546332359314
target_val/f1: 0.843137264251709
source_val/loss: 0.4361336827278137
source_val/accuracy: 0.9102272987365723
source_val/f1: 0.9085676074028015


Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.5815985798835754
target_val/accuracy: 0.8636364340782166
target_val/f1: 0.8619672656059265
source_val/loss: 0.38413819670677185
source_val/accuracy: 0.9164773225784302
source_val/f1: 0.9134675860404968


Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.6317969560623169
target_val/accuracy: 0.8573864102363586
target_val/f1: 0.855230450630188
source_val/loss: 0.4180425703525543
source_val/accuracy: 0.9164773225784302
source_val/f1: 0.9134675860404968


Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.6638080477714539
target_val/accuracy: 0.8573864102363586
target_val/f1: 0.855230450630188
source_val/loss: 0.43939992785453796
source_val/accuracy: 0.9164773225784302
source_val/f1: 0.9134675860404968


Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.6662865877151489
target_val/accuracy: 0.8573864102363586
target_val/f1: 0.855230450630188
source_val/loss: 0.44113802909851074
source_val/accuracy: 0.9164773225784302
source_val/f1: 0.9134675860404968


Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


target_val/loss: 0.6687865257263184
target_val/accuracy: 0.8573864102363586
target_val/f1: 0.855230450630188
source_val/loss: 0.44289979338645935
source_val/accuracy: 0.9164773225784302
source_val/f1: 0.9134675860404968


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Best checkpoint path: checkpoints/lightning_logs/version_3/checkpoints/task-CBA-epoch=03-val_loss=0.38.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_3/checkpoints/CBA-epoch=05.ckpt
test
Test Dataset: 400 samples


Testing: |          | 0/? [00:00<?, ?it/s]

Test Results Last Epoch: [{'source_test/loss': 0.36801064014434814, 'source_test/accuracy': 0.920673131942749, 'source_test/f1': 0.9188576936721802, 'target_test/loss': 0.5227972269058228, 'target_test/accuracy': 0.879807710647583, 'target_test/f1': 0.8721534013748169}]
Best checkpoint path: checkpoints/lightning_logs/version_3/checkpoints/task-CBA-epoch=03-val_loss=0.38.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_3/checkpoints/CBA-epoch=05.ckpt


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 109484547 || all params: 109484547 || trainable%: 100.0
None


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

Test Results on Best Model: [{'source_test/loss': 0.4059876501560211, 'source_test/accuracy': 0.8966346383094788, 'source_test/f1': 0.8937726020812988, 'target_test/loss': 0.5606012344360352, 'target_test/accuracy': 0.8629807829856873, 'target_test/f1': 0.852799654006958}]


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 109484547 || all params: 109484547 || trainable%: 100.0
None


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

Test Results on saved epoch: [{'source_test/loss': 0.3304462432861328, 'source_test/accuracy': 0.920673131942749, 'source_test/f1': 0.9186821579933167, 'target_test/loss': 0.4707556664943695, 'target_test/accuracy': 0.8822115659713745, 'target_test/f1': 0.8745548725128174}]


VBox(children=(Label(value='0.002 MB of 0.015 MB uploaded\r'), FloatProgress(value=0.1602244782142632, max=1.0…

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112279599991679, max=1.0…

Seed set to 10


test


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..
`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


trainable params: 109484547 || all params: 109484547 || trainable%: 100.0
None
Training Dataset: 1350 samples
Validation Dataset: 150 samples



  | Name      | Type                          | Params
------------------------------------------------------------
0 | model     | BertForSequenceClassification | 109 M 
1 | criterion | CrossEntropyLoss              | 0     
2 | accuracy  | MulticlassAccuracy            | 0     
3 | f1        | MulticlassF1Score             | 0     
4 | softmax   | Softmax                       | 0     
------------------------------------------------------------
109 M     Trainable params
0         Non-trainable params
109 M     Total params
437.938   Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 1.232269287109375
target_val/accuracy: 0.375
target_val/f1: 0.19260549545288086
source_val/loss: 1.1961581707000732
source_val/accuracy: 0.359375
source_val/f1: 0.19601328670978546


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/pytorch_lightning/loops/fit_loop.py:298: The number of training batches (43) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.4195283055305481
target_val/accuracy: 0.8357954025268555
target_val/f1: 0.8295646905899048
source_val/loss: 0.37020793557167053
source_val/accuracy: 0.8664773106575012
source_val/f1: 0.8617509007453918


Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.7490813136100769
target_val/accuracy: 0.7795454859733582
target_val/f1: 0.775056779384613
source_val/loss: 0.6596517562866211
source_val/accuracy: 0.7892045378684998
source_val/f1: 0.7621671557426453


Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.45070526003837585
target_val/accuracy: 0.8573864102363586
target_val/f1: 0.8535532355308533
source_val/loss: 0.34762561321258545
source_val/accuracy: 0.9102272987365723
source_val/f1: 0.908132016658783


Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.5818936824798584
target_val/accuracy: 0.8323864340782166
target_val/f1: 0.8295613527297974
source_val/loss: 0.31676244735717773
source_val/accuracy: 0.9261364340782166
source_val/f1: 0.9247390031814575


Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.8240846991539001
target_val/accuracy: 0.8039773106575012
target_val/f1: 0.8006472587585449
source_val/loss: 0.5922938585281372
source_val/accuracy: 0.8732954263687134
source_val/f1: 0.8684573173522949


Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.795753538608551
target_val/accuracy: 0.8227273225784302
target_val/f1: 0.8210240602493286
source_val/loss: 0.5108521580696106
source_val/accuracy: 0.8857954144477844
source_val/f1: 0.8826374411582947


Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.8273277282714844
target_val/accuracy: 0.8386363983154297
target_val/f1: 0.8378167152404785
source_val/loss: 0.5767316818237305
source_val/accuracy: 0.9045454263687134
source_val/f1: 0.902640163898468


Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.8114590644836426
target_val/accuracy: 0.8477272987365723
target_val/f1: 0.8471541404724121
source_val/loss: 0.5699573159217834
source_val/accuracy: 0.9045454263687134
source_val/f1: 0.902640163898468


Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.8139151930809021
target_val/accuracy: 0.8477272987365723
target_val/f1: 0.8471541404724121
source_val/loss: 0.5722736716270447
source_val/accuracy: 0.9045454263687134
source_val/f1: 0.902640163898468


Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


target_val/loss: 0.8172559142112732
target_val/accuracy: 0.8477272987365723
target_val/f1: 0.8471541404724121
source_val/loss: 0.5748948454856873
source_val/accuracy: 0.9045454263687134
source_val/f1: 0.902640163898468


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Best checkpoint path: checkpoints/lightning_logs/version_4/checkpoints/task-CBA-epoch=02-val_loss=0.35.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_4/checkpoints/CBA-epoch=05.ckpt
test
Test Dataset: 400 samples


Testing: |          | 0/? [00:00<?, ?it/s]

Test Results Last Epoch: [{'source_test/loss': 0.4896198511123657, 'source_test/accuracy': 0.9134615659713745, 'source_test/f1': 0.9114871621131897, 'target_test/loss': 0.553587019443512, 'target_test/accuracy': 0.889423131942749, 'target_test/f1': 0.8809139728546143}]
Best checkpoint path: checkpoints/lightning_logs/version_4/checkpoints/task-CBA-epoch=02-val_loss=0.35.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_4/checkpoints/CBA-epoch=05.ckpt


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 109484547 || all params: 109484547 || trainable%: 100.0
None


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

Test Results on Best Model: [{'source_test/loss': 0.3742559552192688, 'source_test/accuracy': 0.884615421295166, 'source_test/f1': 0.8805140852928162, 'target_test/loss': 0.4221184551715851, 'target_test/accuracy': 0.8509615659713745, 'target_test/f1': 0.8387523293495178}]


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 109484547 || all params: 109484547 || trainable%: 100.0
None


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

Test Results on saved epoch: [{'source_test/loss': 0.4351833462715149, 'source_test/accuracy': 0.9134615659713745, 'source_test/f1': 0.9115228056907654, 'target_test/loss': 0.5582337379455566, 'target_test/accuracy': 0.879807710647583, 'target_test/f1': 0.8719866871833801}]


VBox(children=(Label(value='0.002 MB of 0.015 MB uploaded\r'), FloatProgress(value=0.1602345819144911, max=1.0…

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112248600046669, max=1.0…

Seed set to 100


test


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..
`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


trainable params: 109484547 || all params: 109484547 || trainable%: 100.0
None
Training Dataset: 1350 samples
Validation Dataset: 150 samples



  | Name      | Type                          | Params
------------------------------------------------------------
0 | model     | BertForSequenceClassification | 109 M 
1 | criterion | CrossEntropyLoss              | 0     
2 | accuracy  | MulticlassAccuracy            | 0     
3 | f1        | MulticlassF1Score             | 0     
4 | softmax   | Softmax                       | 0     
------------------------------------------------------------
109 M     Trainable params
0         Non-trainable params
109 M     Total params
437.938   Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 1.2295334339141846
target_val/accuracy: 0.046875
target_val/f1: 0.05360623821616173
source_val/loss: 1.2346103191375732
source_val/accuracy: 0.078125
source_val/f1: 0.069444440305233


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/pytorch_lightning/loops/fit_loop.py:298: The number of training batches (43) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.44451385736465454
target_val/accuracy: 0.8329545855522156
target_val/f1: 0.8309821486473083
source_val/loss: 0.3852054178714752
source_val/accuracy: 0.8761364221572876
source_val/f1: 0.8717161417007446


Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.7190023064613342
target_val/accuracy: 0.7886363863945007
target_val/f1: 0.7863054275512695
source_val/loss: 0.4750373959541321
source_val/accuracy: 0.8545454144477844
source_val/f1: 0.8466203808784485


Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.5090550184249878
target_val/accuracy: 0.8755682110786438
target_val/f1: 0.8742696046829224
source_val/loss: 0.3818877339363098
source_val/accuracy: 0.8823863863945007
source_val/f1: 0.876808762550354


Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.5986126065254211
target_val/accuracy: 0.8573864102363586
target_val/f1: 0.8541242480278015
source_val/loss: 0.4948560893535614
source_val/accuracy: 0.8727273344993591
source_val/f1: 0.8704745173454285


Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.6441754102706909
target_val/accuracy: 0.8261364102363586
target_val/f1: 0.8252429962158203
source_val/loss: 0.470445841550827
source_val/accuracy: 0.9073864221572876
source_val/f1: 0.9058643579483032


Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.9028736352920532
target_val/accuracy: 0.7886363863945007
target_val/f1: 0.7818650603294373
source_val/loss: 0.42960119247436523
source_val/accuracy: 0.9011363983154297
source_val/f1: 0.8980045318603516


Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.6162554621696472
target_val/accuracy: 0.8505682349205017
target_val/f1: 0.8485054969787598
source_val/loss: 0.3878405690193176
source_val/accuracy: 0.9164773225784302
source_val/f1: 0.9133815169334412


Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.664929211139679
target_val/accuracy: 0.8505682349205017
target_val/f1: 0.8485054969787598
source_val/loss: 0.399698406457901
source_val/accuracy: 0.9136363863945007
source_val/f1: 0.9106107950210571


Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.6945783495903015
target_val/accuracy: 0.8443182110786438
target_val/f1: 0.8421022295951843
source_val/loss: 0.4093645215034485
source_val/accuracy: 0.9136363863945007
source_val/f1: 0.9106107950210571


Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


target_val/loss: 0.6969617009162903
target_val/accuracy: 0.8443182110786438
target_val/f1: 0.8421022295951843
source_val/loss: 0.41024547815322876
source_val/accuracy: 0.9136363863945007
source_val/f1: 0.9106107950210571


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Best checkpoint path: checkpoints/lightning_logs/version_5/checkpoints/task-CBA-epoch=02-val_loss=0.38.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_5/checkpoints/CBA-epoch=05.ckpt
test
Test Dataset: 400 samples


Testing: |          | 0/? [00:00<?, ?it/s]

Test Results Last Epoch: [{'source_test/loss': 0.45826733112335205, 'source_test/accuracy': 0.9062500596046448, 'source_test/f1': 0.9043419361114502, 'target_test/loss': 0.4210061728954315, 'target_test/accuracy': 0.8942307829856873, 'target_test/f1': 0.8892748951911926}]
Best checkpoint path: checkpoints/lightning_logs/version_5/checkpoints/task-CBA-epoch=02-val_loss=0.38.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_5/checkpoints/CBA-epoch=05.ckpt


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 109484547 || all params: 109484547 || trainable%: 100.0
None


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

Test Results on Best Model: [{'source_test/loss': 0.3510526418685913, 'source_test/accuracy': 0.889423131942749, 'source_test/f1': 0.8871771097183228, 'target_test/loss': 0.2932112216949463, 'target_test/accuracy': 0.8990384936332703, 'target_test/f1': 0.8927214741706848}]


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 109484547 || all params: 109484547 || trainable%: 100.0
None


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

Test Results on saved epoch: [{'source_test/loss': 0.4349322021007538, 'source_test/accuracy': 0.9038462042808533, 'source_test/f1': 0.9017966389656067, 'target_test/loss': 0.5300893187522888, 'target_test/accuracy': 0.8677884936332703, 'target_test/f1': 0.862343430519104}]


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

In [24]:
results.items()

dict_items([('last_epoch', {'source_test/loss': [0.36801064014434814, 0.4896198511123657, 0.45826733112335205], 'source_test/accuracy': [0.920673131942749, 0.9134615659713745, 0.9062500596046448], 'source_test/f1': [0.9188576936721802, 0.9114871621131897, 0.9043419361114502], 'target_test/loss': [0.5227972269058228, 0.553587019443512, 0.4210061728954315], 'target_test/accuracy': [0.879807710647583, 0.889423131942749, 0.8942307829856873], 'target_test/f1': [0.8721534013748169, 0.8809139728546143, 0.8892748951911926]}), ('best_model', {'source_test/loss': [0.4059876501560211, 0.3742559552192688, 0.3510526418685913], 'source_test/accuracy': [0.8966346383094788, 0.884615421295166, 0.889423131942749], 'source_test/f1': [0.8937726020812988, 0.8805140852928162, 0.8871771097183228], 'target_test/loss': [0.5606012344360352, 0.4221184551715851, 0.2932112216949463], 'target_test/accuracy': [0.8629807829856873, 0.8509615659713745, 0.8990384936332703], 'target_test/f1': [0.852799654006958, 0.838752

In [25]:
# Calculate mean and standard deviation for each scenario
mean_results = {scenario: {key: np.mean(values) for key, values in metrics.items()} for scenario, metrics in results.items()}
std_results = {scenario: {key: np.std(values) for key, values in metrics.items()} for scenario, metrics in results.items()}

# Log mean and standard deviation results to wandb
wandb.init(project=project_name, name=f'{domain}_mean_results')
for scenario in mean_results:
    for key, value in mean_results[scenario].items():
        wandb.log({f"{scenario}/{key}": value})
        wandb.log({f"{scenario}/{key}_std": std_results[scenario][key]})
wandb.finish()

print("Mean Results:", mean_results)
print("Standard Deviation Results:", std_results)

# # Save the best model's adapter
# if model:
#     adapter_save_path = f"../../saved/adapter_after_run/{hparams['task_adapter_name']}"
#     model.save_adapter(adapter_save_path, hparams['task_adapter_name'])
#     print(f"Adapter saved to {adapter_save_path}")
# else:
#     print("No best model to save.")

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
best_model/source_test/accuracy,▁
best_model/source_test/accuracy_std,▁
best_model/source_test/f1,▁
best_model/source_test/f1_std,▁
best_model/source_test/loss,▁
best_model/source_test/loss_std,▁
best_model/target_test/accuracy,▁
best_model/target_test/accuracy_std,▁
best_model/target_test/f1,▁
best_model/target_test/f1_std,▁

0,1
best_model/source_test/accuracy,0.89022
best_model/source_test/accuracy_std,0.00494
best_model/source_test/f1,0.88715
best_model/source_test/f1_std,0.00541
best_model/source_test/loss,0.3771
best_model/source_test/loss_std,0.02252
best_model/target_test/accuracy,0.87099
best_model/target_test/accuracy_std,0.02043
best_model/target_test/f1,0.86142
best_model/target_test/f1_std,0.02286


Mean Results: {'last_epoch': {'source_test/loss': 0.438632607460022, 'source_test/accuracy': 0.9134615858395895, 'source_test/f1': 0.9115622639656067, 'target_test/loss': 0.49913013974825543, 'target_test/accuracy': 0.8878205418586731, 'target_test/f1': 0.8807807564735413}, 'best_model': {'source_test/loss': 0.37709874908129376, 'source_test/accuracy': 0.8902243971824646, 'source_test/f1': 0.8871545990308126, 'target_test/loss': 0.42531030376752216, 'target_test/accuracy': 0.8709936141967773, 'target_test/f1': 0.8614244858423868}, 'epoch_saved': {'source_test/loss': 0.40018726388613385, 'source_test/accuracy': 0.9126603007316589, 'source_test/f1': 0.9106672008832296, 'target_test/loss': 0.5196929077307383, 'target_test/accuracy': 0.8766025900840759, 'target_test/f1': 0.8696283300717672}}
Standard Deviation Results: {'last_epoch': {'source_test/loss': 0.05155153944075419, 'source_test/accuracy': 0.00588819462528445, 'source_test/f1': 0.005926271149531255, 'target_test/loss': 0.056654028

In [26]:
print('dones')

dones


In [27]:
best_val_loss

inf