In [1]:

import torch
import gc
torch.cuda.empty_cache()
gc.collect()


0

In [2]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
# Initialize the console

# Step 2: Import necessary libraries
from typing import Optional, Dict, Any
import os
import torch
import pytorch_lightning as pl
import pandas as pd
from transformers import AutoTokenizer
from torch.utils.data import Dataset, DataLoader
from rich.traceback import install
from pytorch_lightning.loggers import WandbLogger
import torch.nn as nn
import torch.optim as optim
from transformers import AutoConfig
from adapters import AutoAdapterModel, AdapterConfig
from adapters.composition import Stack
from pytorch_lightning import seed_everything, Trainer
from pytorch_lightning.callbacks import ModelCheckpoint,EarlyStopping
import torchmetrics

install(show_locals=True)

from setup import setup_src_path
print(setup_src_path())
import data.processed as processed
import config.config as config
import utils.setup as setup
import utils.functions as fn
from importlib import reload

from datasets import load_from_disk

print(config.Config.TXT_SAVE_PATH)
print(config.Config.MODEL_SAVE_PATH)

dataset = load_from_disk(f"../{config.Config.DATASETS_SAVE_PATH}/datasets")


['/usr/lib/python38.zip', '/usr/lib/python3.8', '/usr/lib/python3.8/lib-dynload', '', '/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages', '/tmp/tmphuuyjope', '/home/guest/Desktop/projects/third-experiments/domain_adaptation_project/modules']


2024-09-15 18:17:45.508843: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-09-15 18:17:45.539002: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.




./text-files/
./hp-model-


In [3]:
import torch
import os
import pytorch_lightning as pl
from transformers import  AutoConfig, DataCollatorForLanguageModeling
from collections import defaultdict
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
import numpy as np
import torchmetrics

class JointDomainTaskAdapter(pl.LightningModule):
    def __init__(self, hparams,source_dataset_length,target_dataset_length):
        super(JointDomainTaskAdapter, self).__init__()
        self.save_hyperparameters(hparams)

        # Load config with hidden states output
        self.config = AutoConfig.from_pretrained(self.hparams["pretrained_model_name"])
        self.config.output_hidden_states = True
        self.model = AutoAdapterModel.from_pretrained(self.hparams["pretrained_model_name"], config=self.config)

        # Set reduction factor and leave_out layers
        self.reduction_factor = self.hparams.get("reduction_factor", 16)
        self.leave_out = self.hparams.get("leave_out", [])
        # if self.leave_out != "None":
        #     self.leave_out = self.leave_out.split(",")
        #     self.leave_out = [int(i) for i in self.leave_out]
        # else:
        #     self.leave_out = []

        # Load MLM adapter with head
        self.model.load_adapter(f"{self.hparams['saved_adapter_dir']}/{self.hparams['domain_adapter_name']}", with_head=True)

        # Add classification head for the task
        self.model.add_classification_head(f"{self.hparams['task_adapter_name']}", num_labels=self.hparams["num_classes"])

        # Set active adapters
        self.model.train_adapter(self.hparams['domain_adapter_name'])
        # Calculate alpha based on dataset lengths
        self.alpha = source_dataset_length / (source_dataset_length + target_dataset_length)

        # Initialize loss functions and metrics
        self.criterion = nn.CrossEntropyLoss()
        self.mlm_criterion = nn.CrossEntropyLoss()
        self.accuracy = torchmetrics.Accuracy(task='multiclass', num_classes=self.hparams["num_classes"])
        self.f1 = torchmetrics.F1Score(task='multiclass', num_classes=self.hparams["num_classes"], average="weighted")
        self.f1_macro = torchmetrics.F1Score(task='multiclass', num_classes=self.hparams["num_classes"], average="macro")
        self.f1_micro = torchmetrics.F1Score(task='multiclass',num_classes=self.hparams["num_classes"], average="micro")

        self.softmax = nn.Softmax(dim=1)
        self.validation_outputs = []
        self.test_outputs = []
        # Optimizer related variables
        self.learning_rate = self.hparams.get("learning_rate", 1e-4)
        self.scheduler_factor = self.hparams.get("scheduler_factor", 0.1)
        self.scheduler_patience = self.hparams.get("scheduler_patience", 0.05)
        self.scheduler_threshold = self.hparams.get("scheduler_threshold", 0.0001)
        self.scheduler_cooldown = self.hparams.get("scheduler_cooldown", 0)
        self.scheduler_eps = self.hparams.get("scheduler_eps", 1e-8)

    def forward(self, input_ids, attention_mask=None, labels=None, task=None):
        if task == "mlm":
            self.model.active_head= self.hparams['domain_adapter_name']
            outputs = self.model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        elif task == "classification":
            self.model.active_head= self.hparams['task_adapter_name']
            outputs = self.model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        else:
            raise ValueError("Task must be either 'mlm' or 'classification'.")
        return outputs

    def training_step(self, batch, batch_idx):
        # Separate source and target data processing
        source_input_ids = batch["source_input_ids"]
        source_attention_mask = batch["source_attention_mask"]
        source_labels = batch["label_source"]

        target_input_ids = batch["target_input_ids"]
        target_attention_mask = batch["target_attention_mask"]
        mlm_labels = batch["mlm_labels"]

        # Calculate dynamic alpha based on the lengths of source and target data
        alpha = self.alpha

        # Classification task
        cls_outputs = self(input_ids=source_input_ids, attention_mask=source_attention_mask, task="classification")
        cls_logits = cls_outputs.logits
        task_loss = self.criterion(cls_logits, source_labels)

        # MLM task
        mlm_outputs = self(input_ids=target_input_ids, attention_mask=target_attention_mask, labels=mlm_labels, task="mlm")
        mlm_loss = mlm_outputs.loss

        # Combine losses
        loss = alpha * task_loss + (1 - alpha) * mlm_loss

        accuracy = self.accuracy(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))
        f1 = self.f1(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))

        metrics = {
            "train/accuracy": accuracy,
            "train/f1": f1,
            "train/taskclf_loss": task_loss,
            "train/loss": loss,
            "train/mlm_loss": mlm_loss
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)

        return loss

    def validation_step(self, batch, batch_idx):
        # Separate source and target data processing
        source_input_ids = batch["source_input_ids"]
        source_attention_mask = batch["source_attention_mask"]
        source_labels = batch["label_source"]

        target_input_ids = batch["target_input_ids"]
        target_attention_mask = batch["target_attention_mask"]
        mlm_labels = batch["mlm_labels"]

        # Calculate dynamic alpha based on the lengths of source and target data
        alpha = self.alpha
        # Classification task
        cls_outputs = self(input_ids=source_input_ids, attention_mask=source_attention_mask, task="classification")
        cls_logits = cls_outputs.logits
        task_loss = self.criterion(cls_logits, source_labels)

        # MLM task
        mlm_outputs = self(input_ids=target_input_ids, attention_mask=target_attention_mask, labels=mlm_labels, task="mlm")
        mlm_loss = mlm_outputs.loss

        # Combine losses
        loss = alpha * task_loss + (1 - alpha) * mlm_loss

        accuracy = self.accuracy(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))
        f1 = self.f1(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))
        self.validation_outputs.append({
            "val/accuracy": accuracy,
            "val/f1": f1,
            "val/taskclf_loss": task_loss,
            "val/loss": loss,
            "val/mlm_loss": mlm_loss
                })
        
        metrics = {
            "val/accuracy": accuracy,
            "val/f1": f1,
            "val/taskclf_loss": task_loss,
            "val/loss": loss,
            "val/mlm_loss": mlm_loss
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)

        return metrics
    def on_validation_epoch_start(self):
        self.validation_outputs = []
    
    def on_validation_epoch_end(self):
        outputs= self.validation_outputs

        avg_loss = torch.stack([x["val/loss"] for x in outputs]).mean()
        avg_task_loss = torch.stack([x["val/taskclf_loss"] for x in outputs]).mean()
        avg_mlm_loss = torch.stack([x["val/mlm_loss"] for x in outputs]).mean()
        avg_accuracy = torch.stack([x["val/accuracy"] for x in outputs]).mean()
        avg_f1 = torch.stack([x["val/f1"] for x in outputs]).mean()
        print(f"val/accuracy: {avg_accuracy}")
        print(f"val/f1: {avg_f1}")
        print(f"val/taskclf_loss: {avg_task_loss}")
        print(f"val/loss: {avg_loss}")
        print(f"val/mlm_loss: {avg_mlm_loss}")
        metrics = {
            "val/avg_loss": avg_loss,
            "val/avg_taskclf_loss": avg_task_loss,
            "val/avg_mlm_loss": avg_mlm_loss,
            "val/avg_accuracy": avg_accuracy,
            "val/avg_f1": avg_f1,
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)
        self.log("val_loss", avg_loss)

    def test_step(self, batch, batch_idx):
        # Separate source and target data processing
        source_input_ids = batch["source_input_ids"]
        source_attention_mask = batch["source_attention_mask"]
        source_labels = batch["label_source"]

        target_input_ids = batch["target_input_ids"]
        target_attention_mask = batch["target_attention_mask"]
        target_labels = batch["label_target"]

        # Classification task for source data
        cls_outputs_source = self(input_ids=source_input_ids, attention_mask=source_attention_mask, task="classification")
        cls_logits_source = cls_outputs_source.logits
        task_loss_source = self.criterion(cls_logits_source, source_labels)

        # Classification task for target data
        cls_outputs_target = self(input_ids=target_input_ids, attention_mask=target_attention_mask, task="classification")
        cls_logits_target = cls_outputs_target.logits
        task_loss_target = self.criterion(cls_logits_target, target_labels)

        # Combine losses (though typically you would evaluate them separately)
        loss = task_loss_source + task_loss_target

        accuracy_source = self.accuracy(source_labels, torch.argmax(self.softmax(cls_logits_source), dim=1))
        f1_source = self.f1(source_labels, torch.argmax(self.softmax(cls_logits_source), dim=1))
        f1_macro_source = self.f1_macro(source_labels, torch.argmax(self.softmax(cls_logits_source), dim=1))
        f1_micro_source = self.f1_micro(source_labels, torch.argmax(self.softmax(cls_logits_source), dim=1))
    
        accuracy_target = self.accuracy(target_labels, torch.argmax(self.softmax(cls_logits_target), dim=1))
        f1_target = self.f1(target_labels, torch.argmax(self.softmax(cls_logits_target), dim=1))
        f1_macro_target = self.f1_macro(target_labels, torch.argmax(self.softmax(cls_logits_target), dim=1))
        f1_micro_target = self.f1_micro(target_labels, torch.argmax(self.softmax(cls_logits_target), dim=1))

        metrics = {
            "source_test/loss": task_loss_source,
            "source_test/accuracy": accuracy_source,
            "source_test/f1": f1_source,
            "source_test/f1_macro": f1_macro_source,
            "source_test/f1_micro": f1_micro_source,
            "target_test/loss": task_loss_target,
            "target_test/accuracy": accuracy_target,
            "target_test/f1": f1_target,
            "target_test/f1_macro": f1_macro_target,
            "target_test/f1_micro": f1_micro_target,
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)

        self.test_outputs.append({
            "source_test/loss": task_loss_source,
            "source_test/accuracy": accuracy_source,
            "source_test/f1": f1_source,
            "source_test/f1_macro": f1_macro_source,
            "source_test/f1_micro": f1_micro_source,
            "target_test/loss": task_loss_target,
            "target_test/accuracy": accuracy_target,
            "target_test/f1": f1_target,
            "target_test/f1_macro": f1_macro_target,
            "target_test/f1_micro": f1_micro_target,
        })
        return metrics
    def on_test_epoch_start(self):
        self.test_outputs = []

    def on_test_epoch_end(self):
        outputs=  self.test_outputs

        avg_loss_source = torch.stack([x["source_test/loss"] for x in outputs]).mean()
        avg_task_loss_target = torch.stack([x["target_test/loss"] for x in outputs]).mean()
        avg_accuracy_source = torch.stack([x["source_test/accuracy"] for x in outputs]).mean()
        avg_f1_source = torch.stack([x["source_test/f1"] for x in outputs]).mean()
        avg_f1_macro_source = torch.stack([x["source_test/f1_macro"] for x in outputs]).mean()
        avg_f1_micro_source = torch.stack([x["source_test/f1_micro"] for x in outputs]).mean()

        avg_accuracy_target = torch.stack([x["target_test/accuracy"] for x in outputs]).mean()
        avg_f1_target = torch.stack([x["target_test/f1"] for x in outputs]).mean()
        avg_f1_macro_target = torch.stack([x["target_test/f1_macro"] for x in outputs]).mean()
        avg_f1_micro_target = torch.stack([x["target_test/f1_micro"] for x in outputs]).mean()

        metrics = {
            "source_test/loss": avg_loss_source,
            "target_test/loss": avg_task_loss_target,
            "source_test/accuracy": avg_accuracy_source,
            "source_test/f1": avg_f1_source,
            "source_test/f1_macro": avg_f1_macro_source,
            "source_test/f1_micro": avg_f1_micro_source,
            "target_test/accuracy": avg_accuracy_target,
            "target_test/f1": avg_f1_target,
            "target_test/f1_macro": avg_f1_macro_target,
            "target_test/f1_micro": avg_f1_micro_target,
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)
    def save_adapter(self, location, adapter_name):
        self.model.save_adapter(location, adapter_name)

    def configure_optimizers(self):
        optimizer = optim.AdamW(self.parameters(), lr=self.hparams["learning_rate"])
        lr_scheduler = {
            'scheduler': optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=2, threshold=0.0001, cooldown=0, min_lr=1e-8),
            'monitor': 'val_loss'
        }
        return [optimizer], [lr_scheduler]


In [4]:
import wandb

wandb.login()
# Wandb setup and training loop
seeds = [42, 10, 100]  # List of seeds
project_name = 'mixed_edited'  # Replace with your wandb project name
domain = 'TEF'  # Replace with the specific domain for this notebook
type = 'unipelt'  # Replace with the specific type for this notebook

# Initialize results dictionary
results = {
    "last_epoch": {
        "source_test/loss": [],
        "source_test/accuracy": [],
        "source_test/f1": [],
        "source_test/f1_macro": [],
        "source_test/f1_micro": [],
        "target_test/loss": [],
        "target_test/accuracy": [],
        "target_test/f1": [],
        "target_test/f1_macro": [],
        "target_test/f1_micro": [],
    },
    "best_model": {
        "source_test/loss": [],
        "source_test/accuracy": [],
        "source_test/f1": [],
        "source_test/f1_macro": [],
        "source_test/f1_micro": [],
        "target_test/loss": [],
        "target_test/accuracy": [],
        "target_test/f1": [],
        "target_test/f1_macro": [],
        "target_test/f1_micro": [],
    },
    "epoch_saved": {
        "source_test/loss": [],
        "source_test/accuracy": [],
        "source_test/f1": [],
        "source_test/f1_macro": [],
        "source_test/f1_micro": [],
        "target_test/loss": [],
        "target_test/accuracy": [],
        "target_test/f1": [],
        "target_test/f1_macro": [],
        "target_test/f1_micro": [],
    }
}

best_val_loss = float('inf')
best_model = None
best_model_path = ""

[34m[1mwandb[0m: Currently logged in as: [33mmrawhani5[0m ([33mmrawhani[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [5]:
reload(processed)
for seed in seeds:
    #wandb.init(project=project_name, name=f'{domain}_{type}_run_with_seed_{seed}', config={'seed': seed})

    try:
        seed_everything(seed)

        hparams = {
            "source_target": "telephone_fiction",
            "source_domain": "telephone",
            "target_domain": "fiction",
            "domain_adapter_name": "mlm_unipelt_F",
            "task_adapter_name": "TEFPelt",
            "pretrained_model_name": "bert-base-uncased",
            "padding": "max_length",
            "max_seq_length": 128,
            "bsz": 32,
            "num_classes": 3,
            "learning_rate": 1e-4,
            "reduction_factor": 16,
            "mode": "domain",
            "saved_adapter_dir": "../../saved/adapters",
        }

        save_dir = "checkpoints"
        save_epoch_3 = 6  # Save model at the 3rd epoch
        
        dm = processed.DataModuleSourceTargetMixed(hparams)
        dm.setup('fit')
        dm.setup("test")
        source_length, target_length = dm.get_dataset_lengths()
        print(f"Source dataset length: {source_length}")
        print(f"Target dataset length: {target_length}")
        model = JointDomainTaskAdapter(hparams,source_length,target_length)

        checkpoint_callback = ModelCheckpoint(
            filename="task-TEFPelt-{epoch:02d}-{val_loss:.2f}",
            save_top_k=1,
            monitor="val_loss",
            mode="min",
        )
        save_model_callback_epoch = ModelCheckpoint(
            filename="TEFPelt-{epoch:02d}",
            every_n_epochs=save_epoch_3,
            save_top_k=-1,
        )

        #wandb_logger = WandbLogger()

    except Exception as e:
        print(f"Error during preprocessing : {e}")

    try:
        train_loader = dm.train_dataloader()
        val_loader = dm.val_dataloader()
        trainer = Trainer(
            max_epochs=10,
            accelerator="auto",
            precision=16,
            
            default_root_dir="checkpoints",
            #logger=wandb_logger,
            callbacks=[checkpoint_callback, save_model_callback_epoch],
            limit_train_batches=1.0,
            limit_val_batches=1.0,
            limit_test_batches=1.0,
        )

        trainer.fit(model, train_loader, val_loader)
        print(f"Best checkpoint path: {checkpoint_callback.best_model_path}")
        print(f"Saved epoch checkpoint path: {save_model_callback_epoch.best_model_path}")
    except Exception as e:
        print(f"Error during training : {e}")

    try:
        dm.setup("test")
        test_loader = dm.test_dataloader()
        test_results_last = trainer.test(model, test_loader)
        print("Test Results Last Epoch:", test_results_last)

        # Collect results for last epoch model
        for key, value in test_results_last[0].items():
            results["last_epoch"][key].append(value)

        # Paths to the saved checkpoints
        best_checkpoint_path = checkpoint_callback.best_model_path
        saved_epoch_checkpoint_path = save_model_callback_epoch.best_model_path
        print(f"Best checkpoint path: {best_checkpoint_path}")
        print(f"Saved epoch checkpoint path: {saved_epoch_checkpoint_path}")

        best_model = JointDomainTaskAdapter.load_from_checkpoint(best_checkpoint_path,source_dataset_length=source_length, target_dataset_length=target_length)
        test_results_best = trainer.test(best_model, test_loader)
        print("Test Results on Best Model:", test_results_best)
        for key, value in test_results_best[0].items():
            results["best_model"][key].append(value)

        saved_epoch_model = JointDomainTaskAdapter.load_from_checkpoint(saved_epoch_checkpoint_path,source_dataset_length=source_length, target_dataset_length=target_length)
        test_results_saved_epoch = trainer.test(saved_epoch_model, test_loader)
        print("Test Results on saved epoch:", test_results_saved_epoch)
        for key, value in test_results_saved_epoch[0].items():
            results["epoch_saved"][key].append(value)

    except Exception as e:
        print(f"Error during testing: {e}")

    #wandb.finish()

Seed set to 42




Batch size: 32


Filter:   0%|          | 0/392702 [00:00<?, ? examples/s]

Filter:   0%|          | 0/392702 [00:00<?, ? examples/s]

Filter:   0%|          | 0/9815 [00:00<?, ? examples/s]

Filter:   0%|          | 0/9815 [00:00<?, ? examples/s]

Source genre: telephone
Target genre: fiction
Number of target samples: 69613


Map:   0%|          | 0/69613 [00:00<?, ? examples/s]

Map:   0%|          | 0/69613 [00:00<?, ? examples/s]

Map:   0%|          | 0/7735 [00:00<?, ? examples/s]

Map:   0%|          | 0/7735 [00:00<?, ? examples/s]

Filter:   0%|          | 0/392702 [00:00<?, ? examples/s]

Filter:   0%|          | 0/392702 [00:00<?, ? examples/s]

Filter:   0%|          | 0/9815 [00:00<?, ? examples/s]

Filter:   0%|          | 0/9815 [00:00<?, ? examples/s]

Source genre: telephone


Target genre: fiction
Number of target samples: 69613


Map:   0%|          | 0/69613 [00:00<?, ? examples/s]

Map:   0%|          | 0/69613 [00:00<?, ? examples/s]

Map:   0%|          | 0/7735 [00:00<?, ? examples/s]

Map:   0%|          | 0/7735 [00:00<?, ? examples/s]

Source dataset length: 75013
Target dataset length: 15922


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  state_dict = torch.load(weights_file, map_location="cpu")


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/lightning_fabric/connector.py:563: `precision=16` is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
Using 16bit Automatic Mixed Precision (AMP)


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/pytorch_lightning/plugins/precision/amp.py:55: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.
GPU available: True (cuda), used: True


TPU available: False, using: 0 TPU cores


IPU available: False, using: 0 IPUs


HPU available: False, using: 0 HPUs


`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..


`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..


`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..


Missing logger folder: checkpoints/lightning_logs


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


eee



  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | BertAdapterModel   | 122 M 
1 | criterion     | CrossEntropyLoss   | 0     
2 | mlm_criterion | CrossEntropyLoss   | 0     
3 | accuracy      | MulticlassAccuracy | 0     
4 | f1            | MulticlassF1Score  | 0     
5 | f1_macro      | MulticlassF1Score  | 0     
6 | f1_micro      | MulticlassF1Score  | 0     
7 | softmax       | Softmax            | 0     
-----------------------------------------------------
12.9 M    Trainable params
109 M     Non-trainable params
122 M     Total params
489.615   Total estimated model params size (MB)


Sanity Checking: |                                                                                            …

val/accuracy: 0.28125
val/f1: 0.3381343483924866
val/taskclf_loss: 1.0986328125
val/loss: 1.186010479927063
val/mlm_loss: 1.5976710319519043


Training: |                                                                                                   …

Validation: |                                                                                                 …

val/accuracy: 0.75604248046875
val/f1: 0.7563880085945129
val/taskclf_loss: 0.5969062447547913
val/loss: 0.7708960771560669
val/mlm_loss: 1.5906113386154175


Validation: |                                                                                                 …

val/accuracy: 0.7772349715232849
val/f1: 0.7767946720123291
val/taskclf_loss: 0.5528072118759155
val/loss: 0.7333799600601196
val/mlm_loss: 1.5841083526611328


Validation: |                                                                                                 …

val/accuracy: 0.7889846563339233
val/f1: 0.788966953754425
val/taskclf_loss: 0.5355028510093689
val/loss: 0.7182006239891052
val/mlm_loss: 1.578940749168396


Validation: |                                                                                                 …

val/accuracy: 0.7921854853630066
val/f1: 0.7917566895484924
val/taskclf_loss: 0.5333691239356995
val/loss: 0.7178290486335754
val/mlm_loss: 1.5868713855743408


Validation: |                                                                                                 …

val/accuracy: 0.7972301840782166
val/f1: 0.7971562743186951
val/taskclf_loss: 0.5292502641677856
val/loss: 0.7107207775115967
val/mlm_loss: 1.5656788349151611


Validation: |                                                                                                 …

val/accuracy: 0.7941012382507324
val/f1: 0.7934207916259766
val/taskclf_loss: 0.5368321537971497
val/loss: 0.717678964138031
val/mlm_loss: 1.5696988105773926


Validation: |                                                                                                 …

val/accuracy: 0.7946998476982117
val/f1: 0.7946668863296509
val/taskclf_loss: 0.5504379868507385
val/loss: 0.7253195643424988
val/mlm_loss: 1.549235463142395


Validation: |                                                                                                 …

val/accuracy: 0.7927841544151306
val/f1: 0.7928831577301025
val/taskclf_loss: 0.5684778094291687
val/loss: 0.7393437623977661
val/mlm_loss: 1.5443406105041504


Validation: |                                                                                                 …

val/accuracy: 0.8016443252563477
val/f1: 0.8018682599067688
val/taskclf_loss: 0.5745981335639954
val/loss: 0.7439399361610413
val/mlm_loss: 1.5417563915252686


Validation: |                                                                                                 …

`Trainer.fit` stopped: `max_epochs=10` reached.


val/accuracy: 0.8015245795249939
val/f1: 0.8017160892486572
val/taskclf_loss: 0.5809239149093628
val/loss: 0.7509550452232361
val/mlm_loss: 1.552019715309143


Best checkpoint path: checkpoints/lightning_logs/version_0/checkpoints/task-TEFPelt-epoch=04-val_loss=0.71.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_0/checkpoints/TEFPelt-epoch=05.ckpt


Filter:   0%|          | 0/392702 [00:00<?, ? examples/s]

Filter:   0%|          | 0/392702 [00:00<?, ? examples/s]

Filter:   0%|          | 0/9815 [00:00<?, ? examples/s]

Filter:   0%|          | 0/9815 [00:00<?, ? examples/s]

Source genre: telephone


Target genre: fiction
Number of target samples: 69613


Map:   0%|          | 0/69613 [00:00<?, ? examples/s]

Map:   0%|          | 0/69613 [00:00<?, ? examples/s]

Map:   0%|          | 0/7735 [00:00<?, ? examples/s]

Map:   0%|          | 0/7735 [00:00<?, ? examples/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results Last Epoch: [{'source_test/loss': 0.5673145651817322, 'source_test/accuracy': 0.8104358315467834, 'source_test/f1': 0.809676468372345, 'source_test/f1_macro': 0.7999328374862671, 'source_test/f1_micro': 0.8104358315467834, 'target_test/loss': 0.6726134419441223, 'target_test/accuracy': 0.7716253995895386, 'target_test/f1': 0.7706831693649292, 'target_test/f1_macro': 0.7633039355278015, 'target_test/f1_micro': 0.7716253995895386}]
Best checkpoint path: checkpoints/lightning_logs/version_0/checkpoints/task-TEFPelt-epoch=04-val_loss=0.71.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_0/checkpoints/TEFPelt-epoch=05.ckpt


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/lightning_fabric/utilities/cloud_io.py:57: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results on Best Model: [{'source_test/loss': 0.5246880054473877, 'source_test/accuracy': 0.7996111512184143, 'source_test/f1': 0.7995675206184387, 'source_test/f1_macro': 0.7899763584136963, 'source_test/f1_micro': 0.7996111512184143, 'target_test/loss': 0.5986841917037964, 'target_test/accuracy': 0.7688412070274353, 'target_test/f1': 0.7681253552436829, 'target_test/f1_macro': 0.759750485420227, 'target_test/f1_micro': 0.7688412070274353}]


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Seed set to 10


Test Results on saved epoch: [{'source_test/loss': 0.5300270318984985, 'source_test/accuracy': 0.8039074540138245, 'source_test/f1': 0.8031039834022522, 'source_test/f1_macro': 0.7935399413108826, 'source_test/f1_micro': 0.8039074540138245, 'target_test/loss': 0.6254926919937134, 'target_test/accuracy': 0.7582564949989319, 'target_test/f1': 0.7556814551353455, 'target_test/f1_macro': 0.7513527274131775, 'target_test/f1_micro': 0.7582564949989319}]
Batch size: 32


Source genre: telephone
Target genre: fiction
Number of target samples: 69613


Source genre: telephone
Target genre: fiction
Number of target samples: 69613


Source dataset length: 75013
Target dataset length: 15922


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Using 16bit Automatic Mixed Precision (AMP)


GPU available: True (cuda), used: True


TPU available: False, using: 0 TPU cores


IPU available: False, using: 0 IPUs


HPU available: False, using: 0 HPUs


`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..


`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..


`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | BertAdapterModel   | 122 M 
1 | criterion     | CrossEntropyLoss   | 0     
2 | mlm_criterion | CrossEntropyLoss   | 0     
3 | accuracy      | MulticlassAccuracy | 0     
4 | f1            | MulticlassF1Score  | 0     
5 | f1_macro      | MulticlassF1Score  | 0     
6 | f1_micro      | MulticlassF1Score  | 0     
7 | softmax       | Softmax            | 0     
-----------------------------------------------------
12.9 M    Trainable params
109 M     Non-trainable params
122 M     Total params
489.615   Total estimated model params size (MB)


eee


Sanity Checking: |                                                                                            …

val/accuracy: 0.34375
val/f1: 0.47486889362335205
val/taskclf_loss: 1.0939102172851562
val/loss: 1.2268431186676025
val/mlm_loss: 1.8531275987625122


Training: |                                                                                                   …

Validation: |                                                                                                 …

val/accuracy: 0.7575989961624146
val/f1: 0.7574685215950012
val/taskclf_loss: 0.6052359342575073
val/loss: 0.7807960510253906
val/mlm_loss: 1.6079086065292358


Validation: |                                                                                                 …

val/accuracy: 0.7791666388511658
val/f1: 0.7789310812950134
val/taskclf_loss: 0.5449081659317017
val/loss: 0.7287437319755554
val/mlm_loss: 1.5948443412780762


Validation: |                                                                                                 …

val/accuracy: 0.7864702939987183
val/f1: 0.7866383194923401
val/taskclf_loss: 0.5376132130622864
val/loss: 0.7191594839096069
val/mlm_loss: 1.574474573135376


Validation: |                                                                                                 …

val/accuracy: 0.792456865310669
val/f1: 0.7920271158218384
val/taskclf_loss: 0.5302413702011108
val/loss: 0.7131691575050354
val/mlm_loss: 1.5749930143356323


Validation: |                                                                                                 …

val/accuracy: 0.7968869805335999
val/f1: 0.7971445918083191
val/taskclf_loss: 0.5299345850944519
val/loss: 0.7123377323150635
val/mlm_loss: 1.5716898441314697


Validation: |                                                                                                 …

val/accuracy: 0.79233717918396
val/f1: 0.7920734286308289
val/taskclf_loss: 0.5312795639038086
val/loss: 0.7137839198112488
val/mlm_loss: 1.573613166809082


Validation: |                                                                                                 …

val/accuracy: 0.7965277433395386
val/f1: 0.7964008450508118
val/taskclf_loss: 0.5447834134101868
val/loss: 0.7236025333404541
val/mlm_loss: 1.566069483757019


Validation: |                                                                                                 …

val/accuracy: 0.7944923043251038
val/f1: 0.7942491769790649
val/taskclf_loss: 0.5601310729980469
val/loss: 0.7342934012413025
val/mlm_loss: 1.5548211336135864


Validation: |                                                                                                 …

val/accuracy: 0.8007343411445618
val/f1: 0.8003336191177368
val/taskclf_loss: 0.5798312425613403
val/loss: 0.7492737770080566
val/mlm_loss: 1.5475653409957886


Validation: |                                                                                                 …

`Trainer.fit` stopped: `max_epochs=10` reached.


val/accuracy: 0.8021711111068726
val/f1: 0.8020086884498596
val/taskclf_loss: 0.5833249092102051
val/loss: 0.7515255212783813
val/mlm_loss: 1.543965458869934


Best checkpoint path: checkpoints/lightning_logs/version_1/checkpoints/task-TEFPelt-epoch=04-val_loss=0.71.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_1/checkpoints/TEFPelt-epoch=05.ckpt


Source genre: telephone
Target genre: fiction
Number of target samples: 69613


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results Last Epoch: [{'source_test/loss': 0.5541428327560425, 'source_test/accuracy': 0.8164842128753662, 'source_test/f1': 0.8164159655570984, 'source_test/f1_macro': 0.8054585456848145, 'source_test/f1_micro': 0.8164842128753662, 'target_test/loss': 0.6887568831443787, 'target_test/accuracy': 0.7632968425750732, 'target_test/f1': 0.7623804211616516, 'target_test/f1_macro': 0.7562063932418823, 'target_test/f1_micro': 0.7632968425750732}]
Best checkpoint path: checkpoints/lightning_logs/version_1/checkpoints/task-TEFPelt-epoch=04-val_loss=0.71.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_1/checkpoints/TEFPelt-epoch=05.ckpt


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results on Best Model: [{'source_test/loss': 0.5198516249656677, 'source_test/accuracy': 0.8094517588615417, 'source_test/f1': 0.8096822500228882, 'source_test/f1_macro': 0.7997702360153198, 'source_test/f1_micro': 0.8094517588615417, 'target_test/loss': 0.5795468091964722, 'target_test/accuracy': 0.769345223903656, 'target_test/f1': 0.7686545252799988, 'target_test/f1_macro': 0.7611761689186096, 'target_test/f1_micro': 0.769345223903656}]


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Seed set to 100


Test Results on saved epoch: [{'source_test/loss': 0.5163310766220093, 'source_test/accuracy': 0.8041474223136902, 'source_test/f1': 0.8036644458770752, 'source_test/f1_macro': 0.7936294078826904, 'source_test/f1_micro': 0.8041474223136902, 'target_test/loss': 0.5921480059623718, 'target_test/accuracy': 0.7713613510131836, 'target_test/f1': 0.7707285284996033, 'target_test/f1_macro': 0.7633663415908813, 'target_test/f1_micro': 0.7713613510131836}]
Batch size: 32


Source genre: telephone
Target genre: fiction
Number of target samples: 69613


Source genre: telephone


Target genre: fiction
Number of target samples: 69613
Source dataset length: 75013
Target dataset length: 15922


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Using 16bit Automatic Mixed Precision (AMP)


GPU available: True (cuda), used: True


TPU available: False, using: 0 TPU cores


IPU available: False, using: 0 IPUs


HPU available: False, using: 0 HPUs


`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..


`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..


`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | BertAdapterModel   | 122 M 
1 | criterion     | CrossEntropyLoss   | 0     
2 | mlm_criterion | CrossEntropyLoss   | 0     
3 | accuracy      | MulticlassAccuracy | 0     
4 | f1            | MulticlassF1Score  | 0     
5 | f1_macro      | MulticlassF1Score  | 0     
6 | f1_micro      | MulticlassF1Score  | 0     
7 | softmax       | Softmax            | 0     
-----------------------------------------------------
12.9 M    Trainable params
109 M     Non-trainable params
122 M     Total params
489.615   Total estimated model params size (MB)


eee


Sanity Checking: |                                                                                            …

val/accuracy: 0.34375
val/f1: 0.3604166805744171
val/taskclf_loss: 1.097625732421875
val/loss: 1.194129228591919
val/mlm_loss: 1.6487836837768555


Training: |                                                                                                   …

Validation: |                                                                                                 …

val/accuracy: 0.7548291683197021
val/f1: 0.7547774314880371
val/taskclf_loss: 0.5984296202659607
val/loss: 0.7760128378868103
val/mlm_loss: 1.612656831741333


Validation: |                                                                                                 …

val/accuracy: 0.7786877155303955
val/f1: 0.7787474393844604
val/taskclf_loss: 0.5472469925880432
val/loss: 0.7303370237350464
val/mlm_loss: 1.5929254293441772


Validation: |                                                                                                 …

val/accuracy: 0.7861111164093018
val/f1: 0.7861387729644775
val/taskclf_loss: 0.5312509536743164
val/loss: 0.713821291923523
val/mlm_loss: 1.5739612579345703


Validation: |                                                                                                 …

val/accuracy: 0.7889846563339233
val/f1: 0.7886776924133301
val/taskclf_loss: 0.5355662703514099
val/loss: 0.716978907585144
val/mlm_loss: 1.5716643333435059


Validation: |                                                                                                 …

val/accuracy: 0.7912436127662659
val/f1: 0.7910584211349487
val/taskclf_loss: 0.5454221963882446
val/loss: 0.7268549799919128
val/mlm_loss: 1.5816354751586914


Validation: |                                                                                                 …

val/accuracy: 0.7953304648399353
val/f1: 0.7956075072288513
val/taskclf_loss: 0.536375880241394
val/loss: 0.7204622626304626
val/mlm_loss: 1.5877447128295898


Validation: |                                                                                                 …

val/accuracy: 0.8017959594726562
val/f1: 0.802143394947052
val/taskclf_loss: 0.543064296245575
val/loss: 0.7221089005470276
val/mlm_loss: 1.5656381845474243


Validation: |                                                                                                 …

val/accuracy: 0.803232729434967
val/f1: 0.8035693764686584
val/taskclf_loss: 0.5476763844490051
val/loss: 0.723497748374939
val/mlm_loss: 1.5518417358398438


Validation: |                                                                                                 …

val/accuracy: 0.8029932975769043
val/f1: 0.8032795786857605
val/taskclf_loss: 0.5504971146583557
val/loss: 0.7270395159721375
val/mlm_loss: 1.5587800741195679


Validation: |                                                                                                 …

`Trainer.fit` stopped: `max_epochs=10` reached.


val/accuracy: 0.8039511442184448
val/f1: 0.8044180870056152
val/taskclf_loss: 0.5433896780014038
val/loss: 0.7221869230270386
val/mlm_loss: 1.5645508766174316


Best checkpoint path: checkpoints/lightning_logs/version_2/checkpoints/task-TEFPelt-epoch=02-val_loss=0.71.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_2/checkpoints/TEFPelt-epoch=05.ckpt


Source genre: telephone
Target genre: fiction
Number of target samples: 69613


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results Last Epoch: [{'source_test/loss': 0.5326808094978333, 'source_test/accuracy': 0.807171642780304, 'source_test/f1': 0.806976318359375, 'source_test/f1_macro': 0.7955727577209473, 'source_test/f1_micro': 0.807171642780304, 'target_test/loss': 0.633951723575592, 'target_test/accuracy': 0.7683371901512146, 'target_test/f1': 0.76779705286026, 'target_test/f1_macro': 0.7596005797386169, 'target_test/f1_micro': 0.7683371901512146}]
Best checkpoint path: checkpoints/lightning_logs/version_2/checkpoints/task-TEFPelt-epoch=02-val_loss=0.71.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_2/checkpoints/TEFPelt-epoch=05.ckpt


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results on Best Model: [{'source_test/loss': 0.5258771181106567, 'source_test/accuracy': 0.7907785773277283, 'source_test/f1': 0.790920615196228, 'source_test/f1_macro': 0.7802491784095764, 'source_test/f1_micro': 0.7907785773277283, 'target_test/loss': 0.5855767130851746, 'target_test/accuracy': 0.7595046162605286, 'target_test/f1': 0.7586763501167297, 'target_test/f1_macro': 0.750723123550415, 'target_test/f1_micro': 0.7595046162605286}]


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results on saved epoch: [{'source_test/loss': 0.5318744778633118, 'source_test/accuracy': 0.8013632893562317, 'source_test/f1': 0.8003629446029663, 'source_test/f1_macro': 0.792044997215271, 'source_test/f1_micro': 0.8013632893562317, 'target_test/loss': 0.6036137342453003, 'target_test/accuracy': 0.763032853603363, 'target_test/f1': 0.7617619037628174, 'target_test/f1_macro': 0.7546364068984985, 'target_test/f1_micro': 0.763032853603363}]


In [6]:
results.items()

dict_items([('last_epoch', {'source_test/loss': [0.5673145651817322, 0.5541428327560425, 0.5326808094978333], 'source_test/accuracy': [0.8104358315467834, 0.8164842128753662, 0.807171642780304], 'source_test/f1': [0.809676468372345, 0.8164159655570984, 0.806976318359375], 'source_test/f1_macro': [0.7999328374862671, 0.8054585456848145, 0.7955727577209473], 'source_test/f1_micro': [0.8104358315467834, 0.8164842128753662, 0.807171642780304], 'target_test/loss': [0.6726134419441223, 0.6887568831443787, 0.633951723575592], 'target_test/accuracy': [0.7716253995895386, 0.7632968425750732, 0.7683371901512146], 'target_test/f1': [0.7706831693649292, 0.7623804211616516, 0.76779705286026], 'target_test/f1_macro': [0.7633039355278015, 0.7562063932418823, 0.7596005797386169], 'target_test/f1_micro': [0.7716253995895386, 0.7632968425750732, 0.7683371901512146]}), ('best_model', {'source_test/loss': [0.5246880054473877, 0.5198516249656677, 0.5258771181106567], 'source_test/accuracy': [0.799611151218

In [7]:
# Calculate mean and standard deviation for each scenario
mean_results = {scenario: {key: np.mean(values) for key, values in metrics.items()} for scenario, metrics in results.items()}
std_results = {scenario: {key: np.std(values) for key, values in metrics.items()} for scenario, metrics in results.items()}

# Log mean and standard deviation results to wandb
# wandb.init(project=project_name, name=f'{domain}_mean_results')
# for scenario in mean_results:
#     for key, value in mean_results[scenario].items():
#         wandb.log({f"{scenario}/{key}": value})
#         wandb.log({f"{scenario}/{key}_std": std_results[scenario][key]})
# wandb.finish()

print("Mean Results:", mean_results)
print("Standard Deviation Results:", std_results)

# # Save the best model's adapter
# if model:
#     adapter_save_path = f"../../saved/adapter_after_run/{hparams['task_adapter_name']}"
#     model.save_adapter(adapter_save_path, hparams['task_adapter_name'])
#     print(f"Adapter saved to {adapter_save_path}")
# else:
#     print("No best model to save.")

Mean Results: {'last_epoch': {'source_test/loss': 0.551379402478536, 'source_test/accuracy': 0.8113638957341512, 'source_test/f1': 0.8110229174296061, 'source_test/f1_macro': 0.8003213802973429, 'source_test/f1_micro': 0.8113638957341512, 'target_test/loss': 0.6651073495546976, 'target_test/accuracy': 0.7677531441052755, 'target_test/f1': 0.7669535477956136, 'target_test/f1_macro': 0.7597036361694336, 'target_test/f1_micro': 0.7677531441052755}, 'best_model': {'source_test/loss': 0.523472249507904, 'source_test/accuracy': 0.7999471624692281, 'source_test/f1': 0.8000567952791849, 'source_test/f1_macro': 0.7899985909461975, 'source_test/f1_micro': 0.7999471624692281, 'target_test/loss': 0.5879359046618143, 'target_test/accuracy': 0.7658970157305399, 'target_test/f1': 0.7651520768801371, 'target_test/f1_macro': 0.7572165926297506, 'target_test/f1_micro': 0.7658970157305399}, 'epoch_saved': {'source_test/loss': 0.5260775287946066, 'source_test/accuracy': 0.8031393885612488, 'source_test/f1

In [8]:
print('dones')

dones


In [9]:
best_val_loss

inf