In [1]:

import torch
import gc   
torch.cuda.empty_cache()
gc.collect()


0

In [1]:
from datasets import load_dataset

# Load the MNLI dataset
mnli_dataset = load_dataset('multi_nli')

# Initialize a dictionary to hold genre counts
genre_counts = {
    'train': {},
    'validation_matched': {},
    'validation_mismatched': {},
}

# Function to count genres
def count_genres(split):
    genre_dict = {}
    for item in mnli_dataset[split]:
        genre = item['genre']
        if genre in genre_dict:
            genre_dict[genre] += 1
        else:
            genre_dict[genre] = 1
    return genre_dict

# Count genres in each split
genre_counts['train'] = count_genres('train')
genre_counts['validation_matched'] = count_genres('validation_matched')
genre_counts['validation_mismatched'] = count_genres('validation_mismatched')

import pandas as pd

# Create a summary DataFrame
summary_df = pd.DataFrame(genre_counts)

print(summary_df)


              train  validation_matched  validation_mismatched
government  77350.0              1945.0                    NaN
telephone   83348.0              1966.0                    NaN
fiction     77348.0              1973.0                    NaN
travel      77350.0              1976.0                    NaN
slate       77306.0              1955.0                    NaN
letters         NaN                 NaN                 1977.0
verbatim        NaN                 NaN                 1946.0
facetoface      NaN                 NaN                 1974.0
oup             NaN                 NaN                 1961.0
nineeleven      NaN                 NaN                 1974.0


In [11]:
# List of genres in the MNLI dataset
genres = ['fiction', 'government', 'slate', 'telephone', 'travel']

# Initialize dictionaries to hold the DataFrames for each genre
train_dfs = {}
val_dfs = {}
test_dfs = {}

# Function to filter, split, and store DataFrames
def process_genre(genre):
    source_df = pd.DataFrame(dataset['train']).query(f"genre == '{genre}'")
    test_source_df = pd.DataFrame(dataset['validation_matched']).query(f"genre == '{genre}'")
    
    train_source_df, val_source_df = train_test_split(source_df, test_size=0.1, random_state=42, shuffle=True)
    
    train_dfs[genre] = train_source_df
    val_dfs[genre] = val_source_df
    test_dfs[genre] = test_source_df
    
    print(f"Genre: {genre}")
    print(f"Total train examples: {len(train_source_df)}")
    print(f"Total validation examples: {len(val_source_df)}")
    print(f"Total test examples: {len(test_source_df)}\n")

# Process each genre
for genre in genres:
    process_genre(genre)



Genre: fiction
Total train examples: 69613
Total validation examples: 7735
Total test examples: 1973

Genre: government
Total train examples: 69615
Total validation examples: 7735
Total test examples: 1945

Genre: slate
Total train examples: 69575
Total validation examples: 7731
Total test examples: 1955

Genre: telephone
Total train examples: 75013
Total validation examples: 8335
Total test examples: 1966

Genre: travel
Total train examples: 69615
Total validation examples: 7735
Total test examples: 1976



In [None]:
from sklearn.model_selection import train_test_split

# Filter the mnli_dataset for different genres
source_df = pd.DataFrame(mnli_dataset['train']).query(f"genre == 'government'")
target_df = pd.DataFrame(mnli_dataset['train']).query(f"genre == 'telephone'")

test_source_df = pd.DataFrame(mnli_dataset['validation_matched']).query(f"genre == 'government'")
test_target_df = pd.DataFrame(mnli_dataset['validation_matched']).query(f"genre == 'telephone'")

train_source_df, val_source_df = train_test_split(source_df, test_size=0.1, random_state=42,shuffle=True)
train_target_df, val_target_df = train_test_split(target_df, test_size=0.1, random_state=42,shuffle=True)
print(f"prinssst: {train_source_df.iloc[1]['genre']}")
print(f"print: {train_target_df.iloc[1]['genre']}")
print(f"print: {len(source_df)}")
print(f"print: {len(target_df)}")
print(f"print test: {len(test_source_df)}")
print(f"print test: {len(test_target_df)}")

In [None]:
from sklearn.model_selection import train_test_split

# Filter the mnli_dataset for different genres
source_df = pd.DataFrame(mnli_dataset['train']).query(f"genre == 'travel'")
target_df = pd.DataFrame(mnli_dataset['train']).query(f"genre == 'travel'")

test_source_df = pd.DataFrame(mnli_dataset['validation_matched']).query(f"genre == 'travel'")
test_target_df = pd.DataFrame(mnli_dataset['validation_matched']).query(f"genre == 'travel'")

train_source_df, val_source_df = train_test_split(source_df, test_size=0.1, random_state=42,shuffle=True)
train_target_df, val_target_df = train_test_split(target_df, test_size=0.1, random_state=42,shuffle=True)
print(f"prinssst: {train_source_df.iloc[1]['genre']}")
print(f"print: {train_target_df.iloc[1]['genre']}")
print(f"print: {len(source_df)}")
print(f"print: {len(target_df)}")
print(f"print test: {len(test_source_df)}")
print(f"print test: {len(test_target_df)}")

In [6]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["OMP_NUM_THREADS"] = "1"

# Step 2: Import necessary libraries
from typing import Optional, Dict, Any
import os
import torch
import pytorch_lightning as pl
import pandas as pd
from transformers import AutoTokenizer
from torch.utils.data import Dataset, DataLoader
from rich.traceback import install
from pytorch_lightning.loggers import WandbLogger
import torch.nn as nn
import torch.optim as optim
from transformers import AutoConfig, get_cosine_schedule_with_warmup
from adapters import AutoAdapterModel, AdapterConfig
from adapters.composition import Stack
from pytorch_lightning import seed_everything, Trainer
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
import torchmetrics
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import numpy as np

install(show_locals=True)

from setup import setup_src_path
print(setup_src_path())
import data.processed as processed
import config.config as config
import utils.setup as setup
import utils.functions as fn
from importlib import reload

from datasets import load_from_disk

print(config.Config.TXT_SAVE_PATH)
print(config.Config.MODEL_SAVE_PATH)

dataset = load_from_disk(f"../{config.Config.DATASETS_SAVE_PATH}/datasets")


['/home/guest/Desktop/projects/fourth-expeiments/domain_adaptation_project/continous/fiction', '/usr/lib/python38.zip', '/usr/lib/python3.8', '/usr/lib/python3.8/lib-dynload', '', '/home/guest/.cache/pypoetry/virtualenvs/fourth-experments-OVNdUUAn-py3.8/lib/python3.8/site-packages', '/tmp/tmpbhvrmb2q', '/home/guest/Desktop/projects/fourth-expeiments/domain_adaptation_project/modules']


2024-08-06 21:48:36.492593: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-08-06 21:48:36.687310: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


./text-files/
./hp-model-


In [3]:
# Step 4: Define the DomainTaskAdapter class



class DomainTaskAdapter(pl.LightningModule):
    def __init__(self, hparams):
        super(DomainTaskAdapter, self).__init__()
        self.save_hyperparameters(hparams)
        self.config = AutoConfig.from_pretrained(self.hparams["pretrained_model_name"])
        self.config.output_hidden_states = True
        self.model = AutoAdapterModel.from_pretrained(self.hparams["pretrained_model_name"], config=self.config)
        
        self.reduction_factor = self.hparams.get("reduction_factor", 16)
        if self.reduction_factor == "None":
            self.reduction_factor = 16
        self.leave_out = self.hparams.get("leave_out", [])
       
        self.saved_adapter_dir = self.hparams["saved_adapter_dir"]
        self.domain_adapter_name = self.hparams["domain_adapter_name"]
        
        adapter_config = AdapterConfig.load("lora", r=8, alpha=16)
        
        self.task_adapter_name = self.hparams["task_adapter_name"]
        self.model.add_adapter(self.task_adapter_name, config=adapter_config)

        self.model.load_adapter(f"{self.saved_adapter_dir}/{self.domain_adapter_name}", with_head=False)
        self.model.add_classification_head(self.task_adapter_name, num_labels=self.hparams["num_classes"])
        self.model.active_adapters = Stack(self.domain_adapter_name, self.task_adapter_name)

        self.model.train_adapter(Stack(self.domain_adapter_name, self.task_adapter_name))
        print(self.model.adapter_summary())
        print(fn.print_trainable_parameters(self.model))

        self.training_outputs = []
        self.validation_outputs = []
        self.test_outputs = []
        self.criterion = nn.CrossEntropyLoss()
        self.accuracy = torchmetrics.Accuracy(task='multiclass',                                           
                                     num_classes=self.hparams["num_classes"])
        self.f1 = torchmetrics.F1Score(task='multiclass',num_classes=self.hparams["num_classes"], average="weighted")
        self.softmax = nn.Softmax(dim=1)
        self.entropy_values = []  # For entropy minimization
    def forward(self, input_ids, attention_mask):
        outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
        return outputs.logits

    def training_step(self, batch, batch_idx):
        input_ids, attention_mask = batch["source_input_ids"], batch["source_attention_mask"]
        labels = batch["label_source"]
        logits = self(input_ids=input_ids, attention_mask=attention_mask)
        loss = self.criterion(logits, labels)
        accuracy = self.accuracy(labels, torch.argmax(self.softmax(logits), dim=1))
        f1 = self.f1(labels, torch.argmax(self.softmax(logits), dim=1))
        # self.training_outputs.append({
        #     "train_loss": loss,
        #     "train_accuracy":accuracy,
        #     "train_f1":f1,
        #     })
        self.log("train_loss", loss)
        self.log("train_accuracy", accuracy)
        self.log("train_f1", f1)
        
        return loss
   
    def validation_step(self, batch, batch_idx):
        """validation step of DomainTaskAdapter"""
        # get the input ids and attention mask for source data
        input_ids, attention_mask = batch["source_input_ids"], batch["source_attention_mask"]
        logits = self(input_ids=input_ids, attention_mask=attention_mask)
        labels = batch["label_source"]
        source_loss = self.criterion(logits, labels)
        source_accuracy = self.accuracy(labels, torch.argmax(self.softmax(logits), dim=1))
        source_f1 = self.f1(labels, torch.argmax(self.softmax(logits), dim=1))


        # get the input ids and attention mask for target data
        input_ids, attention_mask = batch["target_input_ids"], batch["target_attention_mask"]
        logits  = self(input_ids=input_ids, attention_mask=attention_mask)
        labels = batch["label_target"]
        target_loss = self.criterion(logits, labels)
        target_accuracy = self.accuracy(labels, torch.argmax(self.softmax(logits), dim=1))
        target_f1 = self.f1(labels, torch.argmax(self.softmax(logits), dim=1))
        # # Entropy minimization - calculate and log entropy
        # probs = self.softmax(logits).cpu().numpy()
        # entropy = -np.sum(probs * np.log(probs + 1e-10), axis=1)
        # avg_entropy = np.mean(entropy)
        # self.entropy_values.append(avg_entropy)
     
        # this will log the mean div value across epoch
        self.log(name="source_val/loss", value=source_loss, prog_bar=True, logger=True)
        self.log(name="source_val/accuracy", value=source_accuracy, prog_bar=True, logger=True)
        self.log(name="source_val/f1", value=source_f1, prog_bar=True, logger=True)
        self.log(name="target_val/loss", value=target_loss, prog_bar=True, logger=True)
        self.log(name="target_val/accuracy", value=target_accuracy, prog_bar=True, logger=True)
        self.log(name="target_val/f1", value=target_f1, prog_bar=True, logger=True)
        
        self.validation_outputs.append({
            "source_val/loss": source_loss,
            "source_val/accuracy": source_accuracy,
            "source_val/f1": source_f1,
            "target_val/loss": target_loss,
            "target_val/accuracy": target_accuracy,
            "target_val/f1": target_f1,
            # "features": target_features.cpu(),  # Collect features for t-SNE
            # "logits": logits.cpu(),  # Collect logits for confusion matrix
            # "labels": labels.cpu()  # Collect labels for confusion matrix   
                })
        return {
            "source_val/loss": source_loss,
            "source_val/accuracy": source_accuracy,
            "source_val/f1": source_f1,
            "target_val/loss": target_loss,
            "target_val/accuracy": target_accuracy,
            "target_val/f1": target_f1,
            # "features": target_features.cpu(),  # Collect features for t-SNE
            # "logits": logits.cpu(),  # Collect logits for confusion matrix
            # "labels": labels.cpu()  # Collect labels for confusion matrix   
                            }
    def on_validation_epoch_start(self):
        self.validation_outputs = []
    
    def on_validation_epoch_end(self):
        try:
            outputs= self.validation_outputs
            mean_source_loss = torch.stack([x["source_val/loss"] for x in outputs]).mean()
            mean_source_accuracy = torch.stack([x["source_val/accuracy"] for x in outputs]).mean()
            mean_source_f1 = torch.stack([x["source_val/f1"] for x in outputs]).mean()

            mean_target_loss = torch.stack([x["target_val/loss"] for x in outputs]).mean()
            mean_target_accuracy = torch.stack([x["target_val/accuracy"] for x in outputs]).mean()
            mean_target_f1 = torch.stack([x["target_val/f1"] for x in outputs]).mean()
            print(f"target_val/loss: {mean_target_loss}")
            print(f"target_val/accuracy: {mean_target_accuracy}")
            print(f"target_val/f1: {mean_target_f1}")
            print(f"source_val/loss: {mean_source_loss}")
            print(f"source_val/accuracy: {mean_source_accuracy}")
            print(f"source_val/f1: {mean_source_f1}")

            self.log(name="source_val/loss", value=mean_source_loss, prog_bar=True, logger=True)
            self.log(name="source_val/accuracy", value=mean_source_accuracy, prog_bar=True, logger=True)
            self.log(name="target_val/loss", value=mean_target_loss, prog_bar=True, logger=True)
            self.log(name="target_val/accuracy", value=mean_target_accuracy, prog_bar=True, logger=True)
            self.log(name="target_val/f1", value=mean_target_f1, prog_bar=True, logger=True)
            self.log(name="source_val/f1", value=mean_source_f1, prog_bar=True, logger=True)
        
        
            self.log("val_loss", mean_source_loss)
            # Generate and log visualizations
            # if hasattr(self.trainer, 'current_epoch'):
            #     self.plot_tsne(outputs, epoch=self.trainer.current_epoch, phase='validation')
            #     self.plot_confusion_matrix(outputs, phase='validation')
        except Exception as e:
            print(f"Error during on_validation_epoch_end: {e}")
            raise

    def test_step(self, batch, batch_idx):
        """validation step of DomainTaskAdapter"""
        # get the input ids and attention mask for source data
        input_ids, attention_mask = batch["source_input_ids"], batch["source_attention_mask"]
        logits  = self(input_ids=input_ids, attention_mask=attention_mask)
        labels = batch["label_source"]
        source_loss = self.criterion(logits, labels)
        source_accuracy = self.accuracy(labels, torch.argmax(self.softmax(logits), dim=1))
        source_f1 = self.f1(labels, torch.argmax(self.softmax(logits), dim=1))

        # get the input ids and attention mask for target data
        input_ids, attention_mask = batch["target_input_ids"], batch["target_attention_mask"]
        logits = self(input_ids=input_ids, attention_mask=attention_mask)
        labels = batch["label_target"]
        target_loss = self.criterion(logits, labels)
        target_accuracy = self.accuracy(labels, torch.argmax(self.softmax(logits), dim=1))
        target_f1 = self.f1(labels, torch.argmax(self.softmax(logits), dim=1))

       

        self.log(name="source_test/loss", value=source_loss, logger=True)
        self.log(name="source_test/accuracy", value=source_accuracy, logger=True)
        self.log(name="source_test/f1", value=source_f1, logger=True)
        self.log(name="target_test/loss", value=target_loss, logger=True)
        self.log(name="target_test/accuracy", value=target_accuracy, logger=True)
        self.log(name="target_test/f1", value=target_f1, logger=True)
        
        self.test_outputs.append({
            "source_test/loss": source_loss,
            "source_test/accuracy": source_accuracy,
            "source_test/f1": source_f1,
            "target_test/loss": target_loss,
            "target_test/accuracy": target_accuracy,
            "target_test/f1": target_f1,
            # "features": target_features.cpu(),  # Collect features for t-SNE
            # "logits": logits.cpu(),  # Collect logits for confusion matrix
            # "labels": labels.cpu()  # Collect labels for confusion matrix   
        })
        return {
            "source_test/loss": source_loss,
            "source_test/accuracy": source_accuracy,
            "source_test/f1": source_f1,
            "target_test/loss": target_loss,
            "target_test/accuracy": target_accuracy,
            "target_test/f1": target_f1,
            # "features": target_features.cpu(),  # Collect features for t-SNE
            # "logits": logits.cpu(),  # Collect logits for confusion matrix
            # "labels": labels.cpu()  # Collect labels for confusion matrix   
        }
    def on_test_epoch_start(self):
        self.test_outputs = []
    def on_test_epoch_end(self):
        try:
            outputs=  self.test_outputs
            mean_source_loss = torch.stack([x["source_test/loss"] for x in outputs]).mean()
            mean_source_accuracy = torch.stack([x["source_test/accuracy"] for x in outputs]).mean()
            mean_source_f1 = torch.stack([x["source_test/f1"] for x in outputs]).mean()

            mean_target_loss = torch.stack([x["target_test/loss"] for x in outputs]).mean()
            mean_target_accuracy = torch.stack([x["target_test/accuracy"] for x in outputs]).mean()
            mean_target_f1 = torch.stack([x["target_test/f1"] for x in outputs]).mean()

            self.log(name="source_test/loss", value=mean_source_loss)
            self.log(name="source_test/accuracy", value=mean_source_accuracy)
            self.log(name="source_test/f1", value=mean_source_f1)
            self.log(name="target_test/loss", value=mean_target_loss)
            self.log(name="target_test/accuracy", value=mean_target_accuracy)
            self.log(name="target_test/f1", value=mean_target_f1)

            # # Generate and log visualizations
            # if hasattr(self.trainer, 'current_epoch'):
            #     self.plot_tsne(outputs, epoch=self.trainer.current_epoch, phase='test')
            #     self.plot_confusion_matrix(outputs, phase='test')
        except Exception as e:
            print(f"Error during on_test_epoch_end: {e}")
            raise
    def save_adapter(self, location, adapter_name):
        self.model.save_adapter(location, adapter_name)
    def configure_optimizers(self):
        optimizer = optim.AdamW(self.parameters(), lr=self.hparams["learning_rate"])
        lr_scheduler = {
            'scheduler': optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=2, threshold=0.0001, cooldown=0, min_lr=1e-8),
            'monitor': 'val_loss'
        }
        return [optimizer], [lr_scheduler]
    def plot_tsne(self, outputs, epoch, phase):
        try:
            features = []
            labels = []
            for output in outputs:
                features.extend(output["features"].numpy())  # Use target features
                labels.extend(output["labels"].numpy())  # Use target labels

            features = np.array(features)
            labels = np.array(labels)
            print(f"Features shape: {features.shape}")
            print(f"Labels shape: {labels.shape}")

            # Flatten features if necessary
            if features.ndim > 2:
                features = features.reshape(features.shape[0], -1)
                print(f"Flattened features shape: {features.shape}")

            tsne = TSNE(n_components=2)
            tsne_results = tsne.fit_transform(features)
            plt.figure(figsize=(10, 6))
            for i in range(self.hparams["num_classes"]):
                idxs = np.where(labels == i)
                plt.scatter(tsne_results[idxs, 0], tsne_results[idxs, 1], label=f'Class {i}')
            plt.legend()
            plt.title(f't-SNE plot {phase} Epoch {epoch}')
            plt.show()  # Display the plot inline
        except Exception as e:
            print(f"Error during t-SNE plotting: {e}")
            raise

    def plot_confusion_matrix(self, outputs, phase):
        try:
            y_true = []
            y_pred = []
            for output in outputs:
                y_true.extend(output["labels"].numpy())  # Use target labels
                y_pred.extend(torch.argmax(output["logits"], dim=1).numpy())  # Use predicted labels from logits

            y_true = np.array(y_true)
            y_pred = np.array(y_pred)
            print(f"y_true shape: {y_true.shape}")
            print(f"y_pred shape: {y_pred.shape}")

            cm = confusion_matrix(y_true, y_pred)
            disp = ConfusionMatrixDisplay(confusion_matrix=cm)
            disp.plot()
            plt.title(f'Confusion Matrix {phase}')
            plt.show()  # Display the plot inline
        except Exception as e:
            print(f"Error during confusion matrix plotting: {e}")
            raise

In [4]:
# Step 5: Training and Evaluation Loop with Wandb logging
import wandb
wandb.login()
# Wandb setup and training loop
seeds = [42,10,100]  # List of seeds
project_name = 'final_continous'  # Replace with your wandb project name
domain = 'GF'  # Replace with the specific domain for this notebook
type = 'invLora'  # Replace with the specific type for this notebook
domain_aprev ='GF'

# Initialize results dictionary
results = {
    "last_epoch": {
        
        "source_test/loss": [],
        "source_test/accuracy": [],
        "source_test/f1": [],
        "target_test/loss": [],
        "target_test/accuracy": [],
        "target_test/f1": [],
    },
    "best_model": {
        
        "source_test/loss": [],
        "source_test/accuracy": [],
        "source_test/f1": [],
        "target_test/loss": [],
        "target_test/accuracy": [],
        "target_test/f1": [],
    },
    "epoch_saved": {
        
        "source_test/loss": [],
        "source_test/accuracy": [],
        "source_test/f1": [],
        "target_test/loss": [],
        "target_test/accuracy": [],
        "target_test/f1": [],
    }
}

best_val_loss = float('inf')
best_model = None
best_model_path = ""


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mmrawhani5[0m ([33mmrawhani[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [5]:
for seed in seeds:
    wandb.init(project=project_name, name=f'{domain}_{type}_run_with_seed_{seed}', config={'seed': seed})

    try:
        seed_everything(seed)

        hparams = {
            "source_target": "government_fiction",
            "source_domain": "government",
            "target_domain": "fiction",
            "domain_adapter_name": "mlm_inv_F",
            "task_adapter_name": "task_GF",
            "pretrained_model_name": "bert-base-uncased",
            "padding": "max_length",
            "max_seq_length": 128,
            "bsz": 32,
            "num_classes": 3,
            "learning_rate": 1e-4,
            "reduction_factor": 16,
            "mode": "domain",
            "saved_adapter_dir": "../../saved/adapters",
        }

        save_dir = "checkpoints"
        save_epoch_3 = 3  # Save model at the 3rd epoch
        #save_model_callback_epoch = SaveModelAtEpochCallback(save_dir, save_epoch_3)
        # Add a print statement to confirm the callback initialization
        #print(f"Initialized SaveModelAtEpochCallback with save_dir={save_dir} and save_epoch={save_epoch_3}")
        dm = processed.DataModuleSourceTarget(hparams)
        dm.setup('fit')
        dm.setup("test")

        model = DomainTaskAdapter(hparams)

        checkpoint_callback = ModelCheckpoint(
            filename="task-GF-{epoch:02d}-{val_loss:.2f}",
            save_top_k=1,
            monitor="val_loss",
            mode="min",
        )
        save_model_callback_epoch = ModelCheckpoint(
                # dirpath=checkpoints_path, # <--- specify this on the trainer itself for version control
                filename="GF-{epoch:02d}",
                every_n_epochs=save_epoch_3,
                save_top_k=-1,  # <--- this is important!
            )
       
        wandb_logger = WandbLogger()
        
    except Exception as e:
        print(f"Error during preprocessing : {e}")   

    try:
        train_loader = dm.train_dataloader()
        val_loader = dm.val_dataloader()
        trainer = Trainer(
            max_epochs=5,
            accelerator="auto",
            default_root_dir="checkpoints",
            # precision=16,
            logger=wandb_logger,
            callbacks=[checkpoint_callback,save_model_callback_epoch],
            limit_train_batches=1.0,
            limit_val_batches=1.0,
            limit_test_batches=1.0,
            # log_every_n_GFps=10,
        )
      
        trainer.fit(model, train_loader, val_loader)
           # After training, print the paths to verify
        print(f"Best checkpoint path: {checkpoint_callback.best_model_path}")
        print(f"Saved epoch checkpoint path: {save_model_callback_epoch.best_model_path}")
    except Exception as e:
        print(f"Error during training : {e}")

    try:
        
        dm.setup("test")
        test_loader = dm.test_dataloader()
        test_results_last = trainer.test(model, test_loader)
        print("Test Results Last Epoch:", test_results_last)

        # Collect results for last epoch model
        for key, value in test_results_last[0].items():
            results["last_epoch"][key].append(value)

        # Paths to the saved checkpoints
        best_checkpoint_path = checkpoint_callback.best_model_path
        saved_epoch_checkpoint_path = save_model_callback_epoch.best_model_path
        # Print the paths to verify
        print(f"Best checkpoint path: {best_checkpoint_path}")
        print(f"Saved epoch checkpoint path: {saved_epoch_checkpoint_path}")
        
        best_model = DomainTaskAdapter.load_from_checkpoint(best_checkpoint_path)
        test_results_best = trainer.test(best_model, test_loader)
        print("Test Results on Best Model:", test_results_best)
        # Collect results for best model
        for key, value in test_results_best[0].items():
            results["best_model"][key].append(value)

        saved_epoch_model = DomainTaskAdapter.load_from_checkpoint(saved_epoch_checkpoint_path)
        test_results_saved_epoch = trainer.test(saved_epoch_model, test_loader)
        print("Test Results on saved epoch:", test_results_saved_epoch)
        # Collect results for 3rd epoch model
        for key, value in test_results_saved_epoch[0].items():
            results["epoch_saved"][key].append(value)
        

    except Exception as e:
        print(f"Error during testing: {e}")

    # Finish the wandb run
    wandb.finish()

Seed set to 42


prinssst: government
print: fiction
print: 69613
prinssst: government
print: fiction
print: 69613


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..
`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..
/home/guest/.cache/pypoetry/virtualenvs/fourth-experments-OVNdUUAn-py3.8/lib/python3.8/site-packages/pytorch_lightning/loggers/wandb.py:396: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiatin

Name                     Architecture         #Param      %Param  Active   Train
--------------------------------------------------------------------------------
task_GF                  lora                294,912       0.269       1       1
mlm_inv_F                bottleneck        7,387,776       6.748       1       1
--------------------------------------------------------------------------------
Full model                               109,482,240     100.000               0
trainable params: 8898237 || all params: 118380477 || trainable%: 7.5166422922928415
None



  | Name      | Type               | Params
-------------------------------------------------
0 | model     | BertAdapterModel   | 118 M 
1 | criterion | CrossEntropyLoss   | 0     
2 | accuracy  | MulticlassAccuracy | 0     
3 | f1        | MulticlassF1Score  | 0     
4 | softmax   | Softmax            | 0     
-------------------------------------------------
8.9 M     Trainable params
109 M     Non-trainable params
118 M     Total params
473.522   Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 1.1094666719436646
target_val/accuracy: 0.328125
target_val/f1: 0.43775519728660583
source_val/loss: 1.1004948616027832
source_val/accuracy: 0.390625
source_val/f1: 0.5521675944328308


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.66880863904953
target_val/accuracy: 0.7229664325714111
target_val/f1: 0.7231324911117554
source_val/loss: 0.5035450458526611
source_val/accuracy: 0.8012710809707642
source_val/f1: 0.8007263541221619


Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.6474196314811707
target_val/accuracy: 0.7387205958366394
target_val/f1: 0.7386850714683533
source_val/loss: 0.4854363799095154
source_val/accuracy: 0.8184456825256348
source_val/f1: 0.8183762431144714


Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.6577655076980591
target_val/accuracy: 0.7460305690765381
target_val/f1: 0.7455891966819763
source_val/loss: 0.4965713322162628
source_val/accuracy: 0.8227069973945618
source_val/f1: 0.8224987983703613


Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.6980367302894592
target_val/accuracy: 0.7503424882888794
target_val/f1: 0.7501357197761536
source_val/loss: 0.5181871652603149
source_val/accuracy: 0.8217020034790039
source_val/f1: 0.8216084241867065


Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=5` reached.


target_val/loss: 0.7549201846122742
target_val/accuracy: 0.7437061667442322
target_val/f1: 0.7443229556083679
source_val/loss: 0.5436117649078369
source_val/accuracy: 0.8254974484443665
source_val/f1: 0.8256782293319702
Best checkpoint path: ./lightning_logs/6he2c4b9/checkpoints/task-GF-epoch=01-val_loss=0.49.ckpt
Saved epoch checkpoint path: ./lightning_logs/6he2c4b9/checkpoints/GF-epoch=02.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


prinssst: government
print: fiction
print: 69613


Testing: |          | 0/? [00:00<?, ?it/s]

Test Results Last Epoch: [{'source_test/loss': 0.5286870002746582, 'source_test/accuracy': 0.8294262290000916, 'source_test/f1': 0.8285898566246033, 'target_test/loss': 0.7382200956344604, 'target_test/accuracy': 0.7503892779350281, 'target_test/f1': 0.7520014643669128}]
Best checkpoint path: ./lightning_logs/6he2c4b9/checkpoints/task-GF-epoch=01-val_loss=0.49.ckpt
Saved epoch checkpoint path: ./lightning_logs/6he2c4b9/checkpoints/GF-epoch=02.ckpt


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Name                     Architecture         #Param      %Param  Active   Train
--------------------------------------------------------------------------------
task_GF                  lora                294,912       0.269       1       1
mlm_inv_F                bottleneck        7,387,776       6.748       1       1
--------------------------------------------------------------------------------
Full model                               109,482,240     100.000               0
trainable params: 8898237 || all params: 118380477 || trainable%: 7.5166422922928415
None


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

Test Results on Best Model: [{'source_test/loss': 0.47508561611175537, 'source_test/accuracy': 0.8128073215484619, 'source_test/f1': 0.8120749592781067, 'target_test/loss': 0.6380230188369751, 'target_test/accuracy': 0.7536065578460693, 'target_test/f1': 0.752042829990387}]


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Name                     Architecture         #Param      %Param  Active   Train
--------------------------------------------------------------------------------
task_GF                  lora                294,912       0.269       1       1
mlm_inv_F                bottleneck        7,387,776       6.748       1       1
--------------------------------------------------------------------------------
Full model                               109,482,240     100.000               0
trainable params: 8898237 || all params: 118380477 || trainable%: 7.5166422922928415
None


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

Test Results on saved epoch: [{'source_test/loss': 0.48607832193374634, 'source_test/accuracy': 0.8261269927024841, 'source_test/f1': 0.8244238495826721, 'target_test/loss': 0.6505133509635925, 'target_test/accuracy': 0.7519261837005615, 'target_test/f1': 0.7511863708496094}]


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▄▄▄▄▄▄▄▄▅▅▅▅▅▅▅▅▇▇▇▇▇▇▇█
source_test/accuracy,█▁▇
source_test/f1,█▁▆
source_test/loss,█▁▂
source_val/accuracy,▁▆▇▇█
source_val/f1,▁▆▇▇█
source_val/loss,▃▁▂▅█
target_test/accuracy,▁█▄
target_test/f1,██▁
target_test/loss,█▁▂

0,1
epoch,5.0
source_test/accuracy,0.82613
source_test/f1,0.82442
source_test/loss,0.48608
source_val/accuracy,0.8255
source_val/f1,0.82568
source_val/loss,0.54361
target_test/accuracy,0.75193
target_test/f1,0.75119
target_test/loss,0.65051


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113570644374704, max=1.0…

Seed set to 10


prinssst: government
print: fiction
print: 69613
prinssst: government
print: fiction
print: 69613


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..
`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..
/home/guest/.cache/pypoetry/virtualenvs/fourth-experments-OVNdUUAn-py3.8/lib/python3.8/site-packages/pytorch_lightning/loggers/wandb.py:396: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiatin

Name                     Architecture         #Param      %Param  Active   Train
--------------------------------------------------------------------------------
task_GF                  lora                294,912       0.269       1       1
mlm_inv_F                bottleneck        7,387,776       6.748       1       1
--------------------------------------------------------------------------------
Full model                               109,482,240     100.000               0
trainable params: 8898237 || all params: 118380477 || trainable%: 7.5166422922928415
None



  | Name      | Type               | Params
-------------------------------------------------
0 | model     | BertAdapterModel   | 118 M 
1 | criterion | CrossEntropyLoss   | 0     
2 | accuracy  | MulticlassAccuracy | 0     
3 | f1        | MulticlassF1Score  | 0     
4 | softmax   | Softmax            | 0     
-------------------------------------------------
8.9 M     Trainable params
109 M     Non-trainable params
118 M     Total params
473.522   Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 1.1125516891479492
target_val/accuracy: 0.25
target_val/f1: 0.37524130940437317
source_val/loss: 1.0863802433013916
source_val/accuracy: 0.4375
source_val/f1: 0.5806211233139038


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.6699430346488953
target_val/accuracy: 0.7239208817481995
target_val/f1: 0.7242587804794312
source_val/loss: 0.5017491579055786
source_val/accuracy: 0.8031575679779053
source_val/f1: 0.8030961155891418


Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.6370854377746582
target_val/accuracy: 0.7427236437797546
target_val/f1: 0.7426708340644836
source_val/loss: 0.4901343584060669
source_val/accuracy: 0.8135386109352112
source_val/f1: 0.8132331967353821


Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.6590683460235596
target_val/accuracy: 0.7487928867340088
target_val/f1: 0.7485981583595276
source_val/loss: 0.49599120020866394
source_val/accuracy: 0.8253682851791382
source_val/f1: 0.8252419829368591


Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.7191697955131531
target_val/accuracy: 0.7516338229179382
target_val/f1: 0.7516467571258545
source_val/loss: 0.5427788496017456
source_val/accuracy: 0.8231449127197266
source_val/f1: 0.8225633502006531


Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=5` reached.


target_val/loss: 0.7694433927536011
target_val/accuracy: 0.737636923789978
target_val/f1: 0.7375475168228149
source_val/loss: 0.5497022867202759
source_val/accuracy: 0.8222410082817078
source_val/f1: 0.8226991295814514
Best checkpoint path: ./lightning_logs/3i41j7x2/checkpoints/task-GF-epoch=01-val_loss=0.49.ckpt
Saved epoch checkpoint path: ./lightning_logs/3i41j7x2/checkpoints/GF-epoch=02.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


prinssst: government
print: fiction
print: 69613


Testing: |          | 0/? [00:00<?, ?it/s]

Test Results Last Epoch: [{'source_test/loss': 0.5324545502662659, 'source_test/accuracy': 0.8266392946243286, 'source_test/f1': 0.8264757990837097, 'target_test/loss': 0.7587589025497437, 'target_test/accuracy': 0.7407991886138916, 'target_test/f1': 0.7398834824562073}]
Best checkpoint path: ./lightning_logs/3i41j7x2/checkpoints/task-GF-epoch=01-val_loss=0.49.ckpt
Saved epoch checkpoint path: ./lightning_logs/3i41j7x2/checkpoints/GF-epoch=02.ckpt


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Name                     Architecture         #Param      %Param  Active   Train
--------------------------------------------------------------------------------
task_GF                  lora                294,912       0.269       1       1
mlm_inv_F                bottleneck        7,387,776       6.748       1       1
--------------------------------------------------------------------------------
Full model                               109,482,240     100.000               0
trainable params: 8898237 || all params: 118380477 || trainable%: 7.5166422922928415
None


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

Test Results on Best Model: [{'source_test/loss': 0.46173930168151855, 'source_test/accuracy': 0.8312499523162842, 'source_test/f1': 0.8305323123931885, 'target_test/loss': 0.6340550780296326, 'target_test/accuracy': 0.7464343905448914, 'target_test/f1': 0.7450035810470581}]


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Name                     Architecture         #Param      %Param  Active   Train
--------------------------------------------------------------------------------
task_GF                  lora                294,912       0.269       1       1
mlm_inv_F                bottleneck        7,387,776       6.748       1       1
--------------------------------------------------------------------------------
Full model                               109,482,240     100.000               0
trainable params: 8898237 || all params: 118380477 || trainable%: 7.5166422922928415
None


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

Test Results on saved epoch: [{'source_test/loss': 0.47257333993911743, 'source_test/accuracy': 0.8316188454627991, 'source_test/f1': 0.8306405544281006, 'target_test/loss': 0.6495317220687866, 'target_test/accuracy': 0.7486270666122437, 'target_test/f1': 0.7467801570892334}]


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▄▄▄▄▄▄▄▄▅▅▅▅▅▅▅▅▇▇▇▇▇▇▇█
source_test/accuracy,▁▇█
source_test/f1,▁██
source_test/loss,█▁▂
source_val/accuracy,▁▄█▇▇
source_val/f1,▁▄█▇▇
source_val/loss,▂▁▂▇█
target_test/accuracy,▁▆█
target_test/f1,▁▆█
target_test/loss,█▁▂

0,1
epoch,5.0
source_test/accuracy,0.83162
source_test/f1,0.83064
source_test/loss,0.47257
source_val/accuracy,0.82224
source_val/f1,0.8227
source_val/loss,0.5497
target_test/accuracy,0.74863
target_test/f1,0.74678
target_test/loss,0.64953


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113375588886103, max=1.0…

Seed set to 100


prinssst: government
print: fiction
print: 69613
prinssst: government
print: fiction
print: 69613


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..
`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..
/home/guest/.cache/pypoetry/virtualenvs/fourth-experments-OVNdUUAn-py3.8/lib/python3.8/site-packages/pytorch_lightning/loggers/wandb.py:396: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiatin

Name                     Architecture         #Param      %Param  Active   Train
--------------------------------------------------------------------------------
task_GF                  lora                294,912       0.269       1       1
mlm_inv_F                bottleneck        7,387,776       6.748       1       1
--------------------------------------------------------------------------------
Full model                               109,482,240     100.000               0
trainable params: 8898237 || all params: 118380477 || trainable%: 7.5166422922928415
None



  | Name      | Type               | Params
-------------------------------------------------
0 | model     | BertAdapterModel   | 118 M 
1 | criterion | CrossEntropyLoss   | 0     
2 | accuracy  | MulticlassAccuracy | 0     
3 | f1        | MulticlassF1Score  | 0     
4 | softmax   | Softmax            | 0     
-------------------------------------------------
8.9 M     Trainable params
109 M     Non-trainable params
118 M     Total params
473.522   Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 1.1073248386383057
target_val/accuracy: 0.265625
target_val/f1: 0.3157169222831726
source_val/loss: 1.0934313535690308
source_val/accuracy: 0.34375
source_val/f1: 0.40217524766921997


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.6717937588691711
target_val/accuracy: 0.7198672890663147
target_val/f1: 0.7192354798316956
source_val/loss: 0.5105410814285278
source_val/accuracy: 0.7973465919494629
source_val/f1: 0.7970380187034607


Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.6300532817840576
target_val/accuracy: 0.7483549118041992
target_val/f1: 0.7480665445327759
source_val/loss: 0.4810294508934021
source_val/accuracy: 0.8193495869636536
source_val/f1: 0.818760871887207


Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.6713889241218567
target_val/accuracy: 0.7431896328926086
target_val/f1: 0.7435737252235413
source_val/loss: 0.5094327926635742
source_val/accuracy: 0.81482994556427
source_val/f1: 0.8143746256828308


Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.7148235440254211
target_val/accuracy: 0.744918942451477
target_val/f1: 0.7448480725288391
source_val/loss: 0.5331845879554749
source_val/accuracy: 0.8227575421333313
source_val/f1: 0.8226426243782043


Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=5` reached.


target_val/loss: 0.7853529453277588
target_val/accuracy: 0.7431110739707947
target_val/f1: 0.7431625127792358
source_val/loss: 0.5690063834190369
source_val/accuracy: 0.8241274952888489
source_val/f1: 0.8240711688995361
Best checkpoint path: ./lightning_logs/ijay2xvg/checkpoints/task-GF-epoch=01-val_loss=0.48.ckpt
Saved epoch checkpoint path: ./lightning_logs/ijay2xvg/checkpoints/GF-epoch=02.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


prinssst: government
print: fiction
print: 69613


Testing: |          | 0/? [00:00<?, ?it/s]

Test Results Last Epoch: [{'source_test/loss': 0.5413951873779297, 'source_test/accuracy': 0.8204917311668396, 'source_test/f1': 0.8193000555038452, 'target_test/loss': 0.7753759026527405, 'target_test/accuracy': 0.7541188597679138, 'target_test/f1': 0.7532474398612976}]
Best checkpoint path: ./lightning_logs/ijay2xvg/checkpoints/task-GF-epoch=01-val_loss=0.48.ckpt
Saved epoch checkpoint path: ./lightning_logs/ijay2xvg/checkpoints/GF-epoch=02.ckpt


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Name                     Architecture         #Param      %Param  Active   Train
--------------------------------------------------------------------------------
task_GF                  lora                294,912       0.269       1       1
mlm_inv_F                bottleneck        7,387,776       6.748       1       1
--------------------------------------------------------------------------------
Full model                               109,482,240     100.000               0
trainable params: 8898237 || all params: 118380477 || trainable%: 7.5166422922928415
None


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

Test Results on Best Model: [{'source_test/loss': 0.46533915400505066, 'source_test/accuracy': 0.8266392946243286, 'source_test/f1': 0.8249484896659851, 'target_test/loss': 0.6222675442695618, 'target_test/accuracy': 0.7443852424621582, 'target_test/f1': 0.7422133088111877}]


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Name                     Architecture         #Param      %Param  Active   Train
--------------------------------------------------------------------------------
task_GF                  lora                294,912       0.269       1       1
mlm_inv_F                bottleneck        7,387,776       6.748       1       1
--------------------------------------------------------------------------------
Full model                               109,482,240     100.000               0
trainable params: 8898237 || all params: 118380477 || trainable%: 7.5166422922928415
None


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

Test Results on saved epoch: [{'source_test/loss': 0.4902130365371704, 'source_test/accuracy': 0.8266392946243286, 'source_test/f1': 0.8249428868293762, 'target_test/loss': 0.6584432125091553, 'target_test/accuracy': 0.7455532550811768, 'target_test/f1': 0.7448871731758118}]


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▄▄▄▄▄▄▄▄▅▅▅▅▅▅▅▅▇▇▇▇▇▇▇█
source_test/accuracy,▁██
source_test/f1,▁██
source_test/loss,█▁▃
source_val/accuracy,▁▇▆██
source_val/f1,▁▇▅██
source_val/loss,▃▁▃▅█
target_test/accuracy,█▁▂
target_test/f1,█▁▃
target_test/loss,█▁▃

0,1
epoch,5.0
source_test/accuracy,0.82664
source_test/f1,0.82494
source_test/loss,0.49021
source_val/accuracy,0.82413
source_val/f1,0.82407
source_val/loss,0.56901
target_test/accuracy,0.74555
target_test/f1,0.74489
target_test/loss,0.65844


In [6]:
results.items()

dict_items([('last_epoch', {'source_test/loss': [0.5286870002746582, 0.5324545502662659, 0.5413951873779297], 'source_test/accuracy': [0.8294262290000916, 0.8266392946243286, 0.8204917311668396], 'source_test/f1': [0.8285898566246033, 0.8264757990837097, 0.8193000555038452], 'target_test/loss': [0.7382200956344604, 0.7587589025497437, 0.7753759026527405], 'target_test/accuracy': [0.7503892779350281, 0.7407991886138916, 0.7541188597679138], 'target_test/f1': [0.7520014643669128, 0.7398834824562073, 0.7532474398612976]}), ('best_model', {'source_test/loss': [0.47508561611175537, 0.46173930168151855, 0.46533915400505066], 'source_test/accuracy': [0.8128073215484619, 0.8312499523162842, 0.8266392946243286], 'source_test/f1': [0.8120749592781067, 0.8305323123931885, 0.8249484896659851], 'target_test/loss': [0.6380230188369751, 0.6340550780296326, 0.6222675442695618], 'target_test/accuracy': [0.7536065578460693, 0.7464343905448914, 0.7443852424621582], 'target_test/f1': [0.752042829990387, 0

In [7]:
# Calculate mean and standard deviation for each scenario
mean_results = {scenario: {key: np.mean(values) for key, values in metrics.items()} for scenario, metrics in results.items()}
std_results = {scenario: {key: np.std(values) for key, values in metrics.items()} for scenario, metrics in results.items()}

# Log mean and standard deviation results to wandb
wandb.init(project=project_name, name=f'{domain}_mean_results')
for scenario in mean_results:
    for key, value in mean_results[scenario].items():
        wandb.log({f"{scenario}/{key}": value})
        wandb.log({f"{scenario}/{key}_std": std_results[scenario][key]})
wandb.finish()

print("Mean Results:", mean_results)
print("Standard Deviation Results:", std_results)

# # Save the best model's adapter
# if model:
#     adapter_save_path = f"../../saved/adapter_after_run/{hparams['task_adapter_name']}"
#     model.save_adapter(adapter_save_path, hparams['task_adapter_name'])
#     print(f"Adapter saved to {adapter_save_path}")
# else:
#     print("No best model to save.")

VBox(children=(Label(value='0.002 MB of 0.004 MB uploaded\r'), FloatProgress(value=0.5484638684552142, max=1.0…

0,1
best_model/source_test/accuracy,▁
best_model/source_test/accuracy_std,▁
best_model/source_test/f1,▁
best_model/source_test/f1_std,▁
best_model/source_test/loss,▁
best_model/source_test/loss_std,▁
best_model/target_test/accuracy,▁
best_model/target_test/accuracy_std,▁
best_model/target_test/f1,▁
best_model/target_test/f1_std,▁

0,1
best_model/source_test/accuracy,0.82357
best_model/source_test/accuracy_std,0.00784
best_model/source_test/f1,0.82252
best_model/source_test/f1_std,0.00773
best_model/source_test/loss,0.46739
best_model/source_test/loss_std,0.00564
best_model/target_test/accuracy,0.74814
best_model/target_test/accuracy_std,0.00395
best_model/target_test/f1,0.74642
best_model/target_test/f1_std,0.00414


Mean Results: {'last_epoch': {'source_test/loss': 0.5341789126396179, 'source_test/accuracy': 0.8255190849304199, 'source_test/f1': 0.8247885704040527, 'target_test/loss': 0.7574516336123148, 'target_test/accuracy': 0.7484357754389445, 'target_test/f1': 0.7483774622281393}, 'best_model': {'source_test/loss': 0.46738802393277484, 'source_test/accuracy': 0.8235655228296915, 'source_test/f1': 0.8225185871124268, 'target_test/loss': 0.6314485470453898, 'target_test/accuracy': 0.7481420636177063, 'target_test/f1': 0.7464199066162109}, 'epoch_saved': {'source_test/loss': 0.4829548994700114, 'source_test/accuracy': 0.8281283775965372, 'source_test/f1': 0.8266690969467163, 'target_test/loss': 0.6528294285138448, 'target_test/accuracy': 0.7487021684646606, 'target_test/f1': 0.7476179003715515}}
Standard Deviation Results: {'last_epoch': {'source_test/loss': 0.0053294511323971035, 'source_test/accuracy': 0.0037325116835576207, 'source_test/f1': 0.003975773021598206, 'target_test/loss': 0.0151969

In [8]:
print('dones')

dones


In [9]:
best_val_loss

inf

: 