In [2]:
import torch
import gc
torch.cuda.empty_cache()
gc.collect()


0

In [3]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
# Initialize the console

# Step 2: Import necessary libraries
from typing import Optional, Dict, Any
import os
import torch
import pytorch_lightning as pl
import pandas as pd
from transformers import AutoTokenizer
from torch.utils.data import Dataset, DataLoader
from rich.traceback import install
from pytorch_lightning.loggers import WandbLogger
import torch.nn as nn
import torch.optim as optim
from transformers import AutoConfig
from adapters import AutoAdapterModel, AdapterConfig
from adapters.composition import Stack
from pytorch_lightning import seed_everything, Trainer
from pytorch_lightning.callbacks import ModelCheckpoint,EarlyStopping
import torchmetrics
import numpy as np
import glob
install(show_locals=True)

from setup import setup_src_path
print(setup_src_path())
import data.processed as processed
import config.config as config
import utils.setup as setup
import utils.functions as fn
from importlib import reload

from datasets import load_from_disk

print(config.Config.TXT_SAVE_PATH)
print(config.Config.MODEL_SAVE_PATH)

dataset = load_from_disk(f"../{config.Config.DATASETS_SAVE_PATH}/datasets")


['/home/guest/Desktop/projects/fourth-expeiments/domain_adaptation_project/composition/government', '/usr/lib/python38.zip', '/usr/lib/python3.8', '/usr/lib/python3.8/lib-dynload', '', '/home/guest/.cache/pypoetry/virtualenvs/fourth-experments-OVNdUUAn-py3.8/lib/python3.8/site-packages', '/tmp/tmpvrcvyua9', '/home/guest/Desktop/projects/fourth-expeiments/domain_adaptation_project/modules']


2024-06-30 22:38:55.136141: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-06-30 22:38:55.203919: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


./text-files/
./hp-model-


In [4]:
# Step 4: Define the DomainTaskAdapter class
import torch.nn as nn
import torch.optim as optim
from transformers import AutoConfig
from adapters import AutoAdapterModel, AdapterConfig
from adapters.composition import Stack,Fuse
from pytorch_lightning import seed_everything, Trainer
from pytorch_lightning.callbacks import ModelCheckpoint,EarlyStopping
import torchmetrics

class DomainTaskAdapter(pl.LightningModule):
    def __init__(self, hparams):
        super(DomainTaskAdapter, self).__init__()
        self.save_hyperparameters(hparams)
        self.config = AutoConfig.from_pretrained(self.hparams["pretrained_model_name"])
        self.config.output_hidden_states = True
        self.model = AutoAdapterModel.from_pretrained(self.hparams["pretrained_model_name"], config=self.config)
        self.saved_adapter_dir = self.hparams["saved_adapter_dir"]
        
        self.reduction_factor = self.hparams.get("reduction_factor", 16)
        if self.reduction_factor == "None":
            self.reduction_factor = 16
        self.leave_out = self.hparams.get("leave_out", [])
        #if self.leave_out != "None":
         #   self.leave_out = [int(i) for i in self.leave_out.split(",")]

        
        self.adapter_name = self.hparams['task_fusion_name']
      
        TR = self.model.load_adapter(f"{self.saved_adapter_dir}/mlm_inv_TR", with_head=False)
        S = self.model.load_adapter(f"{self.saved_adapter_dir}/mlm_inv_S", with_head=False)
        F = self.model.load_adapter(f"{self.saved_adapter_dir}/mlm_inv_F", with_head=False)
        G = self.model.load_adapter(f"{self.saved_adapter_dir}/mlm_inv_G", with_head=False)
        TE = self.model.load_adapter(f"{self.saved_adapter_dir}/mlm_inv_TE", with_head=False)
        #print(self.model.adapter_summary())
        self.model.add_adapter_fusion(Fuse(TE,G,S,F,TR))
        self.model.set_active_adapters(Fuse(TE,G,S,F,TR))

        self.model.add_classification_head(self.adapter_name, num_labels=self.hparams["num_classes"])
        adapter_setup = Fuse(TE,G,S,F,TR)
        self.model.train_adapter_fusion(adapter_setup)
        fn.print_trainable_parameters(self.model)
   
        self.validation_outputs = []
        self.test_outputs = []
        self.criterion = nn.CrossEntropyLoss()
        self.accuracy = torchmetrics.Accuracy(task='multiclass',                                           
                                     num_classes=self.hparams["num_classes"])
        self.f1 = torchmetrics.F1Score(task='multiclass',num_classes=self.hparams["num_classes"], average="weighted")
        self.softmax = nn.Softmax(dim=1)
    def forward(self, input_ids, attention_mask):
        outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
        return outputs.logits

    def training_step(self, batch, batch_idx):
        input_ids, attention_mask = batch["source_input_ids"], batch["source_attention_mask"]
        labels = batch["label_source"]
        logits = self(input_ids=input_ids, attention_mask=attention_mask)
        loss = self.criterion(logits, labels)
        accuracy = self.accuracy(labels, torch.argmax(self.softmax(logits), dim=1))
        f1 = self.f1(labels, torch.argmax(self.softmax(logits), dim=1))

        self.log("train_loss", loss)
        self.log("train_accuracy", accuracy)
        self.log("train_f1", f1)
        return loss
    def validation_step(self, batch, batch_idx):
        """validation step of DomainTaskAdapter"""
        # get the input ids and attention mask for source data
        input_ids, attention_mask = batch["source_input_ids"], batch["source_attention_mask"]
        logits = self(input_ids=input_ids, attention_mask=attention_mask)
        labels = batch["label_source"]
        source_loss = self.criterion(logits, labels)
        source_accuracy = self.accuracy(labels, torch.argmax(self.softmax(logits), dim=1))
        source_f1 = self.f1(labels, torch.argmax(self.softmax(logits), dim=1))

        # get the input ids and attention mask for target data
        input_ids, attention_mask = batch["target_input_ids"], batch["target_attention_mask"]
        logits = self(input_ids=input_ids, attention_mask=attention_mask)
        labels = batch["label_target"]
        target_loss = self.criterion(logits, labels)
        target_accuracy = self.accuracy(labels, torch.argmax(self.softmax(logits), dim=1))
        target_f1 = self.f1(labels, torch.argmax(self.softmax(logits), dim=1))

     
        # this will log the mean div value across epoch
        self.log(name="source_val/loss", value=source_loss, prog_bar=True, logger=True)
        self.log(name="source_val/accuracy", value=source_accuracy, prog_bar=True, logger=True)
        self.log(name="target_val/loss", value=target_loss, prog_bar=True, logger=True)
        self.log(name="target_val/accuracy", value=target_accuracy, prog_bar=True, logger=True)
        self.log(name="target_val/f1", value=target_f1, prog_bar=True, logger=True)
        self.log(name="source_val/f1", value=source_f1, prog_bar=True, logger=True)
        self.validation_outputs.append({
            "source_val/loss": source_loss,
            "source_val/accuracy": source_accuracy,
            "source_val/f1": source_f1,
            "target_val/loss": target_loss,
            "target_val/accuracy": target_accuracy,
            "target_val/f1": target_f1,
            })
        return {
            "source_val/loss": source_loss,
            "source_val/accuracy": source_accuracy,
            "source_val/f1": source_f1,
            "target_val/loss": target_loss,
            "target_val/accuracy": target_accuracy,
            "target_val/f1": target_f1,
        }
    def on_validation_epoch_start(self):
        self.validation_outputs = []
    
    def on_validation_epoch_end(self):
        outputs= self.validation_outputs
        mean_source_loss = torch.stack([x["source_val/loss"] for x in outputs]).mean()
        mean_source_accuracy = torch.stack([x["source_val/accuracy"] for x in outputs]).mean()
        mean_source_f1 = torch.stack([x["source_val/f1"] for x in outputs]).mean()

        mean_target_loss = torch.stack([x["target_val/loss"] for x in outputs]).mean()
        mean_target_accuracy = torch.stack([x["target_val/accuracy"] for x in outputs]).mean()
        mean_target_f1 = torch.stack([x["target_val/f1"] for x in outputs]).mean()
        print(f"target_val/loss: {mean_target_loss}")
        print(f"target_val/accuracy: {mean_target_accuracy}")
        print(f"target_val/f1: {mean_target_accuracy}")
        print(f"source_val/loss: {mean_target_loss}")
        print(f"source_val/accuracy: {mean_target_accuracy}")
        print(f"source_val/f1: {mean_target_accuracy}")
        # this will log the mean div value across epoch
        self.log(name="source_val/loss", value=mean_source_loss, prog_bar=True, logger=True)
        self.log(name="source_val/accuracy", value=mean_source_accuracy, prog_bar=True, logger=True)
        self.log(name="target_val/loss", value=mean_target_loss, prog_bar=True, logger=True)
        self.log(name="target_val/accuracy", value=mean_target_accuracy, prog_bar=True, logger=True)
        self.log(name="target_val/f1", value=mean_target_f1, prog_bar=True, logger=True)
        self.log(name="source_val/f1", value=mean_source_f1, prog_bar=True, logger=True)
                # Log `val_loss` as `mean_source_loss`
        self.log("val_loss", mean_source_loss)

    def test_step(self, batch, batch_idx):
        """validation step of DomainTaskAdapter"""
        # get the input ids and attention mask for source data
        #self.model.active_adapters = Stack("target_inv_mlm_SG", self.adapter_name)
        #print(self.model.adapter_summary())
        input_ids, attention_mask = batch["source_input_ids"], batch["source_attention_mask"]
        logits = self(input_ids=input_ids, attention_mask=attention_mask)
        labels = batch["label_source"]
        source_loss = self.criterion(logits, labels)
        source_accuracy = self.accuracy(labels, torch.argmax(self.softmax(logits), dim=1))
        source_f1 = self.f1(labels, torch.argmax(self.softmax(logits), dim=1))

        # get the input ids and attention mask for target data
        input_ids, attention_mask = batch["target_input_ids"], batch["target_attention_mask"]
        logits = self(input_ids=input_ids, attention_mask=attention_mask)
        labels = batch["label_target"]
        target_loss = self.criterion(logits, labels)
        target_accuracy = self.accuracy(labels, torch.argmax(self.softmax(logits), dim=1))
        target_f1 = self.f1(labels, torch.argmax(self.softmax(logits), dim=1))

        # this will log the mean div value across epoch
        self.log(name="source_test/loss", value=source_loss)
        self.log(name="source_test/accuracy", value=source_accuracy)
        self.log(name="target_test/loss", value=target_loss)
        self.log(name="target_test/accuracy", value=target_accuracy)
        self.log(name="target_test/f1", value=target_f1)
        self.log(name="source_test/f1", value=source_f1)
        self.test_outputs.append({
            "source_test/loss": source_loss,
            "source_test/accuracy": source_accuracy,
            "source_test/f1": source_f1,
            "target_test/loss": target_loss,
            "target_test/accuracy": target_accuracy,
            "target_test/f1": target_f1,
         })
        # need not to log here (or we can do it but let's log at the end of each epoch)
        return {
            "source_test/loss": source_loss,
            "source_test/accuracy": source_accuracy,
            "source_test/f1": source_f1,
            "target_test/loss": target_loss,
            "target_test/accuracy": target_accuracy,
            "target_test/f1": target_f1,
        }
    def on_test_epoch_start(self):
        self.test_outputs = []
    def on_test_epoch_end(self):
        outputs=  self.test_outputs
        mean_source_loss = torch.stack([x["source_test/loss"] for x in outputs]).mean()
        mean_source_accuracy = torch.stack([x["source_test/accuracy"] for x in outputs]).mean()
        mean_source_f1 = torch.stack([x["source_test/f1"] for x in outputs]).mean()

        mean_target_loss = torch.stack([x["target_test/loss"] for x in outputs]).mean()
        mean_target_accuracy = torch.stack([x["target_test/accuracy"] for x in outputs]).mean()
        mean_target_f1 = torch.stack([x["target_test/f1"] for x in outputs]).mean()

        # this will log the mean div value across epoch
        self.log(name="source_test/loss", value=mean_source_loss)
        self.log(name="source_test/accuracy", value=mean_source_accuracy)
        self.log(name="target_test/loss", value=mean_target_loss)
        self.log(name="target_test/accuracy", value=mean_target_accuracy)
        self.log(name="target_test/f1", value=mean_target_f1)
        self.log(name="source_test/f1", value=mean_source_f1)
    def save_adapter(self, location, adapter_name):
        self.model.save_adapter(location, adapter_name)
    def configure_optimizers(self):
        optimizer = optim.AdamW(self.parameters(), lr=self.hparams["learning_rate"])
        lr_scheduler = {
            'scheduler': optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=2, threshold=0.0001, cooldown=0, min_lr=1e-8),
            'monitor': 'val_loss'
        }
        return [optimizer], [lr_scheduler]


In [5]:
# Step 5: Training and Evaluation Loop with Wandb logging
import wandb
wandb.login()
# Wandb setup and training loop
seeds = [10,100]  # List of seeds
project_name = 'final_composition'  # Replace with your wandb project name
domain = 'TEG'  # Replace with the specific domain for this notebook
type = 'fuse'  # Replace with the specific type for this notebook

# Initialize results dictionary
results = {
    "last_epoch": {
        
        "source_test/loss": [],
        "source_test/accuracy": [],
        "source_test/f1": [],
        "target_test/loss": [],
        "target_test/accuracy": [],
        "target_test/f1": [],
    },
    "best_model": {
        
        "source_test/loss": [],
        "source_test/accuracy": [],
        "source_test/f1": [],
        "target_test/loss": [],
        "target_test/accuracy": [],
        "target_test/f1": [],
    },
    "epoch_saved": {
        
        "source_test/loss": [],
        "source_test/accuracy": [],
        "source_test/f1": [],
        "target_test/loss": [],
        "target_test/accuracy": [],
        "target_test/f1": [],
    },
   
}

best_val_loss = float('inf')
best_model = None
best_model_path = ""
  # Hyperparameters
        

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mmrawhani5[0m ([33mmrawhani[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [6]:
for seed in seeds:
    wandb.init(project=project_name, name=f'{domain}_{type}_run_with_seed_{seed}', config={'seed': seed})

    try:
        seed_everything(seed)

        hparams = {
            "source_target": "telephone_government",
            "source_domain": "telephone",
            "target_domain": "government",
            "task_fusion_name": "fuse_all_TEG",
            "pretrained_model_name": "bert-base-uncased",
            "padding": "max_length",
            "max_seq_length": 128,
            "bsz": 32,
            "num_classes": 3,
            "learning_rate": 1e-4,
            "reduction_factor": 16,
            "mode": "domain",
            "saved_adapter_dir": "../../saved/adapters",
        }

        save_dir = "checkpoints"
        save_epoch_3 = 4   # Save model at the 3rd epoch
        #save_model_callback_epoch = SaveModelAtEpochCallback(save_dir, save_epoch_3)
        # Add a print statement to confirm the callback initialization
        #print(f"Initialized SaveModelAtEpochCallback with save_dir={save_dir} and save_epoch={save_epoch_3}")
        dm = processed.DataModuleSourceTarget(hparams)
        dm.setup('fit')
        dm.setup("test")

        model = DomainTaskAdapter(hparams)

        checkpoint_callback = ModelCheckpoint(
            filename="task-TEG-{epoch:02d}-{val_loss:.2f}",
            save_top_k=1,
            monitor="val_loss",
            mode="min",
        )
        save_model_callback_epoch = ModelCheckpoint(
                # dirpath=checkpoints_path, # <--- specify this on the trainer itself for version control
                filename="TEG-{epoch:02d}",
                every_n_epochs=save_epoch_3,
                save_top_k=-1,  # <--- this is important!
            )
       
        wandb_logger = WandbLogger()
        
    except Exception as e:
        print(f"Error during preprocessing : {e}")   

    try:
        train_loader = dm.train_dataloader()
        val_loader = dm.val_dataloader()
        trainer = Trainer(
            max_epochs=6,
            accelerator="auto",
            default_root_dir="checkpoints",
            #precision=16,
            logger=wandb_logger,
            callbacks=[checkpoint_callback,save_model_callback_epoch],
            limit_train_batches=1.0,
            limit_val_batches=1.0,
            limit_test_batches=1.0,
            # log_every_n_steps=10,
        ) 
      
        trainer.fit(model, train_loader, val_loader)
           # After training, print the paths to verify
        print(f"Best checkpoint path: {checkpoint_callback.best_model_path}")
        print(f"Saved epoch checkpoint path: {save_model_callback_epoch.best_model_path}")
    except Exception as e:
        print(f"Error during training : {e}")

    try:
        
        dm.setup("test")
        test_loader = dm.test_dataloader()
        test_results_last = trainer.test(model, test_loader)
        print("Test Results Last Epoch:", test_results_last)

        # Collect results for last epoch model
        for key, value in test_results_last[0].items():
            results["last_epoch"][key].append(value)

        # Paths to the saved checkpoints
        best_checkpoint_path = checkpoint_callback.best_model_path
        saved_epoch_checkpoint_path = save_model_callback_epoch.best_model_path
        # Print the paths to verify
        print(f"Best checkpoint path: {best_checkpoint_path}")
        print(f"Saved epoch checkpoint path: {saved_epoch_checkpoint_path}")
        
        best_model = DomainTaskAdapter.load_from_checkpoint(best_checkpoint_path)
        test_results_best = trainer.test(best_model, test_loader)
        print("Test Results on Best Model:", test_results_best)
        # Collect results for best model
        for key, value in test_results_best[0].items():
            results["best_model"][key].append(value)

        saved_epoch_model = DomainTaskAdapter.load_from_checkpoint(saved_epoch_checkpoint_path)
        test_results_saved_epoch = trainer.test(saved_epoch_model, test_loader)
        print("Test Results on saved epoch:", test_results_saved_epoch)
        # Collect results for 3rd epoch model
        for key, value in test_results_saved_epoch[0].items():
            results["epoch_saved"][key].append(value)
        

    except Exception as e:
        print(f"Error during testing: {e}")

    # Finish the wandb run
    wandb.finish()

Seed set to 10


prinssst: telephone
print: government
print: 69615
prinssst: telephone
print: government
print: 69615


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..
`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..


trainable params: 22467645 || all params: 168888765 || trainable%: 13.303220613875647


/home/guest/.cache/pypoetry/virtualenvs/fourth-experments-OVNdUUAn-py3.8/lib/python3.8/site-packages/pytorch_lightning/loggers/wandb.py:396: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type               | Params
-------------------------------------------------
0 | model     | BertAdapterModel   | 168 M 
1 | criterion | CrossEntropyLoss   | 0     
2 | accuracy  | MulticlassAccuracy | 0     
3 | f1        | MulticlassF1Score  | 0     
4 | softmax   | Softmax            | 0     
-------------------------------------------------
22.5 M    Trainable params
146 M     Non-trainable params
168 M     Total params
675.555   Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 1.0965802669525146
target_val/accuracy: 0.390625
target_val/f1: 0.390625
source_val/loss: 1.0965802669525146
source_val/accuracy: 0.390625
source_val/f1: 0.390625


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.654904305934906
target_val/accuracy: 0.7389563918113708
target_val/f1: 0.7389563918113708
source_val/loss: 0.654904305934906
source_val/accuracy: 0.7389563918113708
source_val/f1: 0.7389563918113708


Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.6069677472114563
target_val/accuracy: 0.7585788369178772
target_val/f1: 0.7585788369178772
source_val/loss: 0.6069677472114563
source_val/accuracy: 0.7585788369178772
source_val/f1: 0.7585788369178772


Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.6026970744132996
target_val/accuracy: 0.7652150988578796
target_val/f1: 0.7652150988578796
source_val/loss: 0.6026970744132996
source_val/accuracy: 0.7652150988578796
source_val/f1: 0.7652150988578796


Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.5885061621665955
target_val/accuracy: 0.7676180601119995
target_val/f1: 0.7676180601119995
source_val/loss: 0.5885061621665955
source_val/accuracy: 0.7676180601119995
source_val/f1: 0.7676180601119995


Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.5986643433570862
target_val/accuracy: 0.7705376148223877
target_val/f1: 0.7705376148223877
source_val/loss: 0.5986643433570862
source_val/accuracy: 0.7705376148223877
source_val/f1: 0.7705376148223877


Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.5838244557380676
target_val/accuracy: 0.7738670110702515
target_val/f1: 0.7738670110702515
source_val/loss: 0.5838244557380676
source_val/accuracy: 0.7738670110702515
source_val/f1: 0.7738670110702515


`Trainer.fit` stopped: `max_epochs=6` reached.


Best checkpoint path: ./lightning_logs/w58xnq4b/checkpoints/task-TEG-epoch=05-val_loss=0.54.ckpt
Saved epoch checkpoint path: ./lightning_logs/w58xnq4b/checkpoints/TEG-epoch=03.ckpt
prinssst: telephone
print: government
print: 69615


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

Test Results Last Epoch: [{'source_test/loss': 0.5812999606132507, 'source_test/accuracy': 0.7819262146949768, 'target_test/loss': 0.5613597631454468, 'target_test/accuracy': 0.7818442583084106, 'target_test/f1': 0.7808178663253784, 'source_test/f1': 0.7815006971359253}]
Best checkpoint path: ./lightning_logs/w58xnq4b/checkpoints/task-TEG-epoch=05-val_loss=0.54.ckpt
Saved epoch checkpoint path: ./lightning_logs/w58xnq4b/checkpoints/TEG-epoch=03.ckpt


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 22467645 || all params: 168888765 || trainable%: 13.303220613875647


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

Test Results on Best Model: [{'source_test/loss': 0.5812999606132507, 'source_test/accuracy': 0.7819262146949768, 'target_test/loss': 0.5613597631454468, 'target_test/accuracy': 0.7818442583084106, 'target_test/f1': 0.7808178663253784, 'source_test/f1': 0.7815006971359253}]


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 22467645 || all params: 168888765 || trainable%: 13.303220613875647


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

Test Results on saved epoch: [{'source_test/loss': 0.5750565528869629, 'source_test/accuracy': 0.7606351971626282, 'target_test/loss': 0.5638126134872437, 'target_test/accuracy': 0.7779712677001953, 'target_test/f1': 0.7772086262702942, 'source_test/f1': 0.7602396011352539}]


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇█
source_test/accuracy,██▁
source_test/f1,██▁
source_test/loss,██▁
source_val/accuracy,▁▄▆▇▇█
source_val/f1,▁▄▆▇▇█
source_val/loss,█▅▃▂▂▁
target_test/accuracy,██▁
target_test/f1,██▁
target_test/loss,▁▁█

0,1
epoch,6.0
source_test/accuracy,0.76064
source_test/f1,0.76024
source_test/loss,0.57506
source_val/accuracy,0.78867
source_val/f1,0.78835
source_val/loss,0.53896
target_test/accuracy,0.77797
target_test/f1,0.77721
target_test/loss,0.56381


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112322566623334, max=1.0…

Seed set to 100


prinssst: telephone
print: government
print: 69615
prinssst: telephone
print: government
print: 69615


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 22467645 || all params: 168888765 || trainable%: 13.303220613875647


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..
`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..
/home/guest/.cache/pypoetry/virtualenvs/fourth-experments-OVNdUUAn-py3.8/lib/python3.8/site-packages/pytorch_lightning/loggers/wandb.py:396: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type               | Params
-------------------------------------------------
0 | model     | BertAdapterModel   | 168 M 
1 | criterion | CrossEntropyLoss   | 0     
2 | accuracy  | Multic

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 1.090855598449707
target_val/accuracy: 0.375
target_val/f1: 0.375
source_val/loss: 1.090855598449707
source_val/accuracy: 0.375
source_val/f1: 0.375


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.6866886019706726
target_val/accuracy: 0.7307648658752441
target_val/f1: 0.7307648658752441
source_val/loss: 0.6866886019706726
source_val/accuracy: 0.7307648658752441
source_val/f1: 0.7307648658752441


In [5]:
for seed in seeds:
    wandb.init(project=project_name, name=f'{domain}_{type}_run_with_seed_{seed}', config={'seed': seed})

    try:
        seed_everything(seed)

        hparams = {
            "source_target": "telephone_government",
            "source_domain": "telephone",
            "target_domain": "government",
            "task_fusion_name": "fuse_all_TEG",
            "pretrained_model_name": "bert-base-uncased",
            "padding": "max_length",
            "max_seq_length": 128,
            "bsz": 32,
            "num_classes": 3,
            "learning_rate": 1e-4,
            "reduction_factor": 16,
            "mode": "domain",
            "saved_adapter_dir": "../../saved/adapters",
        }

        save_dir = "checkpoints"
        save_epoch_3 = 4   # Save model at the 3rd epoch
        #save_model_callback_epoch = SaveModelAtEpochCallback(save_dir, save_epoch_3)
        # Add a print statement to confirm the callback initialization
        #print(f"Initialized SaveModelAtEpochCallback with save_dir={save_dir} and save_epoch={save_epoch_3}")
        dm = processed.DataModuleSourceTarget(hparams)
        dm.setup('fit')
        dm.setup("test")

        model = DomainTaskAdapter(hparams)

        checkpoint_callback = ModelCheckpoint(
            filename="task-TEG-{epoch:02d}-{val_loss:.2f}",
            save_top_k=1,
            monitor="val_loss",
            mode="min",
        )
        save_model_callback_epoch = ModelCheckpoint(
                # dirpath=checkpoints_path, # <--- specify this on the trainer itself for version control
                filename="TEG-{epoch:02d}",
                every_n_epochs=save_epoch_3,
                save_top_k=-1,  # <--- this is important!
            )
       
        wandb_logger = WandbLogger()
        
    except Exception as e:
        print(f"Error during preprocessing : {e}")   

    try:
        train_loader = dm.train_dataloader()
        val_loader = dm.val_dataloader()
        trainer = Trainer(
            max_epochs=6,
            accelerator="auto",
            default_root_dir="checkpoints",
            #precision=16,
            logger=wandb_logger,
            callbacks=[checkpoint_callback,save_model_callback_epoch],
            limit_train_batches=1.0,
            limit_val_batches=1.0,
            limit_test_batches=1.0,
            # log_every_n_steps=10,
        ) 
      
        trainer.fit(model, train_loader, val_loader)
           # After training, print the paths to verify
        print(f"Best checkpoint path: {checkpoint_callback.best_model_path}")
        print(f"Saved epoch checkpoint path: {save_model_callback_epoch.best_model_path}")
    except Exception as e:
        print(f"Error during training : {e}")

    try:
        
        dm.setup("test")
        test_loader = dm.test_dataloader()
        test_results_last = trainer.test(model, test_loader)
        print("Test Results Last Epoch:", test_results_last)

        # Collect results for last epoch model
        for key, value in test_results_last[0].items():
            results["last_epoch"][key].append(value)

        # Paths to the saved checkpoints
        best_checkpoint_path = checkpoint_callback.best_model_path
        saved_epoch_checkpoint_path = save_model_callback_epoch.best_model_path
        # Print the paths to verify
        print(f"Best checkpoint path: {best_checkpoint_path}")
        print(f"Saved epoch checkpoint path: {saved_epoch_checkpoint_path}")
        
        best_model = DomainTaskAdapter.load_from_checkpoint(best_checkpoint_path)
        test_results_best = trainer.test(best_model, test_loader)
        print("Test Results on Best Model:", test_results_best)
        # Collect results for best model
        for key, value in test_results_best[0].items():
            results["best_model"][key].append(value)

        saved_epoch_model = DomainTaskAdapter.load_from_checkpoint(saved_epoch_checkpoint_path)
        test_results_saved_epoch = trainer.test(saved_epoch_model, test_loader)
        print("Test Results on saved epoch:", test_results_saved_epoch)
        # Collect results for 3rd epoch model
        for key, value in test_results_saved_epoch[0].items():
            results["epoch_saved"][key].append(value)
        

    except Exception as e:
        print(f"Error during testing: {e}")

    # Finish the wandb run
    wandb.finish()

Seed set to 42


prinssst: telephone
print: government
print: 69615
prinssst: telephone
print: government
print: 69615


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..
`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..
/home/guest/.cache/pypoetry/virtualenvs/fourth-experments-OVNdUUAn-py3.8/lib/python3.8/site-packages/pytorch_lightning/loggers/wandb.py:396: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiatin

trainable params: 22467645 || all params: 168888765 || trainable%: 13.303220613875647



  | Name      | Type               | Params
-------------------------------------------------
0 | model     | BertAdapterModel   | 168 M 
1 | criterion | CrossEntropyLoss   | 0     
2 | accuracy  | MulticlassAccuracy | 0     
3 | f1        | MulticlassF1Score  | 0     
4 | softmax   | Softmax            | 0     
-------------------------------------------------
22.5 M    Trainable params
146 M     Non-trainable params
168 M     Total params
675.555   Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 1.1245408058166504
target_val/accuracy: 0.265625
target_val/f1: 0.265625
source_val/loss: 1.1245408058166504
source_val/accuracy: 0.265625
source_val/f1: 0.265625


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.6570931077003479
target_val/accuracy: 0.7364466786384583
target_val/f1: 0.7364466786384583
source_val/loss: 0.6570931077003479
source_val/accuracy: 0.7364466786384583
source_val/f1: 0.7364466786384583


Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.6162487268447876
target_val/accuracy: 0.7526106834411621
target_val/f1: 0.7526106834411621
source_val/loss: 0.6162487268447876
source_val/accuracy: 0.7526106834411621
source_val/f1: 0.7526106834411621


Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.5860627889633179
target_val/accuracy: 0.767022967338562
target_val/f1: 0.767022967338562
source_val/loss: 0.5860627889633179
source_val/accuracy: 0.767022967338562
source_val/f1: 0.767022967338562


Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.584557831287384
target_val/accuracy: 0.770458996295929
target_val/f1: 0.770458996295929
source_val/loss: 0.584557831287384
source_val/accuracy: 0.770458996295929
source_val/f1: 0.770458996295929


Validation: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 0.5644055604934692
target_val/accuracy: 0.7799866795539856
target_val/f1: 0.7799866795539856
source_val/loss: 0.5644055604934692
source_val/accuracy: 0.7799866795539856
source_val/f1: 0.7799866795539856


Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=6` reached.


target_val/loss: 0.5781533718109131
target_val/accuracy: 0.7790827751159668
target_val/f1: 0.7790827751159668
source_val/loss: 0.5781533718109131
source_val/accuracy: 0.7790827751159668
source_val/f1: 0.7790827751159668
Best checkpoint path: ./lightning_logs/fbuz32xb/checkpoints/task-TEG-epoch=04-val_loss=0.54.ckpt
Saved epoch checkpoint path: ./lightning_logs/fbuz32xb/checkpoints/TEG-epoch=03.ckpt
prinssst: telephone
print: government
print: 69615


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

Test Results Last Epoch: [{'source_test/loss': 0.5886989831924438, 'source_test/accuracy': 0.7708811163902283, 'target_test/loss': 0.5612685680389404, 'target_test/accuracy': 0.7888728976249695, 'target_test/f1': 0.7891225814819336, 'source_test/f1': 0.7696481943130493}]
Best checkpoint path: ./lightning_logs/fbuz32xb/checkpoints/task-TEG-epoch=04-val_loss=0.54.ckpt
Saved epoch checkpoint path: ./lightning_logs/fbuz32xb/checkpoints/TEG-epoch=03.ckpt


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 22467645 || all params: 168888765 || trainable%: 13.303220613875647


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

Test Results on Best Model: [{'source_test/loss': 0.5705001354217529, 'source_test/accuracy': 0.7824385166168213, 'target_test/loss': 0.5596006512641907, 'target_test/accuracy': 0.7860245704650879, 'target_test/f1': 0.785571277141571, 'source_test/f1': 0.7821311950683594}]


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 22467645 || all params: 168888765 || trainable%: 13.303220613875647


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

Test Results on saved epoch: [{'source_test/loss': 0.5758314728736877, 'source_test/accuracy': 0.7686884999275208, 'target_test/loss': 0.5819535851478577, 'target_test/accuracy': 0.7706557512283325, 'target_test/f1': 0.7700730562210083, 'source_test/f1': 0.7676345109939575}]


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇█
source_test/accuracy,▂█▁
source_test/f1,▂█▁
source_test/loss,█▁▃
source_val/accuracy,▁▄▅▇██
source_val/f1,▁▄▅▇██
source_val/loss,█▄▃▂▁▂
target_test/accuracy,█▇▁
target_test/f1,█▇▁
target_test/loss,▂▁█

0,1
epoch,6.0
source_test/accuracy,0.76869
source_test/f1,0.76763
source_test/loss,0.57583
source_val/accuracy,0.78304
source_val/f1,0.78215
source_val/loss,0.5484
target_test/accuracy,0.77066
target_test/f1,0.77007
target_test/loss,0.58195


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01111232132219205, max=1.0)…

Seed set to 10


prinssst: telephone
print: government
print: 69615
prinssst: telephone
print: government
print: 69615


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 22467645 || all params: 168888765 || trainable%: 13.303220613875647


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..
`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..
/home/guest/.cache/pypoetry/virtualenvs/fourth-experments-OVNdUUAn-py3.8/lib/python3.8/site-packages/pytorch_lightning/loggers/wandb.py:396: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type               | Params
-------------------------------------------------
0 | model     | BertAdapterModel   | 168 M 
1 | criterion | CrossEntropyLoss   | 0     
2 | accuracy  | Multic

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

target_val/loss: 1.0965802669525146
target_val/accuracy: 0.390625
target_val/f1: 0.390625
source_val/loss: 1.0965802669525146
source_val/accuracy: 0.390625
source_val/f1: 0.390625


Training: |          | 0/? [00:00<?, ?it/s]

In [None]:
results.items()

dict_items([('last_epoch', {'source_test/loss': [0.5814250111579895, 0.5800893902778625, 0.5866435170173645], 'source_test/accuracy': [0.7819262146949768, 0.7871925830841064, 0.7735860347747803], 'source_test/f1': [0.7737833261489868, 0.7791577577590942, 0.7653276324272156], 'target_test/loss': [0.5661231875419617, 0.5696138143539429, 0.5633202195167542], 'target_test/accuracy': [0.7904098033905029, 0.7776024341583252, 0.7827253937721252], 'target_test/f1': [0.7852294445037842, 0.7724827527999878, 0.7782156467437744]}), ('best_model', {'source_test/loss': [0.5814250111579895, 0.5634174942970276, 0.5866435170173645], 'source_test/accuracy': [0.7819262146949768, 0.7801024317741394, 0.7735860347747803], 'source_test/f1': [0.7737833261489868, 0.7694737911224365, 0.7653276324272156], 'target_test/loss': [0.5661231875419617, 0.5496447682380676, 0.5633202195167542], 'target_test/accuracy': [0.7904098033905029, 0.7948769927024841, 0.7827253937721252], 'target_test/f1': [0.7852294445037842, 0.7

In [None]:
# Calculate mean and standard deviation for each scenario
mean_results = {scenario: {key: np.mean(values) for key, values in metrics.items()} for scenario, metrics in results.items()}
std_results = {scenario: {key: np.std(values) for key, values in metrics.items()} for scenario, metrics in results.items()}

# Log mean and standard deviation results to wandb
wandb.init(project=project_name, name=f'{domain}_mean_results')
for scenario in mean_results:
    for key, value in mean_results[scenario].items():
        wandb.log({f"{scenario}/{key}": value})
        wandb.log({f"{scenario}/{key}_std": std_results[scenario][key]})
wandb.finish()

print("Mean Results:", mean_results)
print("Standard Deviation Results:", std_results)

# # Save the best model's adapter
# if best_model:
#     TR = "mlm_inv_TR" 
#     S = "mlm_inv_S" 
#     F = "mlm_inv_F" 
#     G = "mlm_inv_G" 
#     TE = "mlm_inv_TE" 
#     best_model.model.save_adapter_fusion(f"../../saved/adapter_after_run/fusion_layer_{hparams['task_fusion_name']}", Fuse(TE,G,S,F,TR))
#     adapter_save_path = f"../../saved/adapter_after_run/fusion_head_{hparams['task_fusion_name']}"

#     best_model.model.save_head(adapter_save_path, hparams['task_fusion_name'])
#     print(f"Adapter saved to {adapter_save_path}")
# else:
#     print("No best model to save.")

VBox(children=(Label(value='0.009 MB of 0.009 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
best_model/source_test/accuracy,▁
best_model/source_test/accuracy_std,▁
best_model/source_test/f1,▁
best_model/source_test/f1_std,▁
best_model/source_test/loss,▁
best_model/source_test/loss_std,▁
best_model/target_test/accuracy,▁
best_model/target_test/accuracy_std,▁
best_model/target_test/f1,▁
best_model/target_test/f1_std,▁

0,1
best_model/source_test/accuracy,0.77854
best_model/source_test/accuracy_std,0.00358
best_model/source_test/f1,0.76953
best_model/source_test/f1_std,0.00345
best_model/source_test/loss,0.57716
best_model/source_test/loss_std,0.00995
best_model/target_test/accuracy,0.78934
best_model/target_test/accuracy_std,0.00502
best_model/target_test/f1,0.78422
best_model/target_test/f1_std,0.00455


Mean Results: {'last_epoch': {'source_test/loss': 0.5827193061510721, 'source_test/accuracy': 0.7809016108512878, 'source_test/f1': 0.7727562387784322, 'target_test/loss': 0.5663524071375529, 'target_test/accuracy': 0.7835792104403178, 'target_test/f1': 0.7786426146825155}, 'best_model': {'source_test/loss': 0.5771620074907938, 'source_test/accuracy': 0.7785382270812988, 'source_test/f1': 0.7695282498995463, 'target_test/loss': 0.5596960584322611, 'target_test/accuracy': 0.7893373966217041, 'target_test/f1': 0.7842217683792114}, 'epoch_saved': {'source_test/loss': 0.5810579657554626, 'source_test/accuracy': 0.7688046296437582, 'source_test/f1': 0.7589112718900045, 'target_test/loss': 0.5613650878270467, 'target_test/accuracy': 0.7789958715438843, 'target_test/f1': 0.7733606100082397}}
Standard Deviation Results: {'last_epoch': {'source_test/loss': 0.0028279019285450007, 'source_test/accuracy': 0.0056018984282642, 'source_test/f1': 0.005692642797150989, 'target_test/loss': 0.00257445660