In [1]:

import torch
import gc
torch.cuda.empty_cache()
gc.collect()


0

In [2]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
# Initialize the console

# Step 2: Import necessary libraries
from typing import Optional, Dict, Any
import os
import torch
import pytorch_lightning as pl
import pandas as pd
from transformers import AutoTokenizer
from torch.utils.data import Dataset, DataLoader
from rich.traceback import install
from pytorch_lightning.loggers import WandbLogger
import torch.nn as nn
import torch.optim as optim
from transformers import AutoConfig
from adapters import AutoAdapterModel, AdapterConfig
from adapters.composition import Stack
from pytorch_lightning import seed_everything, Trainer
from pytorch_lightning.callbacks import ModelCheckpoint,EarlyStopping
import torchmetrics

install(show_locals=True)

from setup import setup_src_path
print(setup_src_path())
import data.processed as processed
import config.config as config
import utils.setup as setup
import utils.functions as fn
from importlib import reload

from datasets import load_from_disk

print(config.Config.TXT_SAVE_PATH)
print(config.Config.MODEL_SAVE_PATH)

dataset = load_from_disk(f"../{config.Config.DATASETS_SAVE_PATH}/datasets")


['/usr/lib/python38.zip', '/usr/lib/python3.8', '/usr/lib/python3.8/lib-dynload', '', '/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages', '/tmp/tmp8vjt9vej', '/home/guest/Desktop/projects/third-experiments/domain_adaptation_project/modules']


2024-09-26 09:02:22.149105: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-09-26 09:02:22.217400: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.




./text-files/
./hp-model-


In [3]:
import torch
import os
import pytorch_lightning as pl
from transformers import  AutoConfig, DataCollatorForLanguageModeling
from collections import defaultdict
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
import numpy as np
import torchmetrics

class JointDomainTaskAdapter(pl.LightningModule):
    def __init__(self, hparams,source_dataset_length,target_dataset_length):
        super(JointDomainTaskAdapter, self).__init__()
        self.save_hyperparameters(hparams)

        # Load config with hidden states output
        self.config = AutoConfig.from_pretrained(self.hparams["pretrained_model_name"])
        self.config.output_hidden_states = True
        self.model = AutoAdapterModel.from_pretrained(self.hparams["pretrained_model_name"], config=self.config)

        # Set reduction factor and leave_out layers
        self.reduction_factor = self.hparams.get("reduction_factor", 16)
        self.leave_out = self.hparams.get("leave_out", [])
        # if self.leave_out != "None":
        #     self.leave_out = self.leave_out.split(",")
        #     self.leave_out = [int(i) for i in self.leave_out]
        # else:
        #     self.leave_out = []

        # Load MLM adapter with head
        self.model.load_adapter(f"{self.hparams['saved_adapter_dir']}/{self.hparams['domain_adapter_name']}", with_head=True)

        # Add classification head for the task
        self.model.add_classification_head(f"{self.hparams['task_adapter_name']}", num_labels=self.hparams["num_classes"])

        # Set active adapters
        self.model.train_adapter(self.hparams['domain_adapter_name'])
        # Calculate alpha based on dataset lengths
        self.alpha = source_dataset_length / (source_dataset_length + target_dataset_length)

        # Initialize loss functions and metrics
        self.criterion = nn.CrossEntropyLoss()
        self.mlm_criterion = nn.CrossEntropyLoss()
        self.accuracy = torchmetrics.Accuracy(task='multiclass', num_classes=self.hparams["num_classes"])
        self.f1 = torchmetrics.F1Score(task='multiclass', num_classes=self.hparams["num_classes"], average="weighted")
        self.f1_macro = torchmetrics.F1Score(task='multiclass', num_classes=self.hparams["num_classes"], average="macro")
        self.f1_micro = torchmetrics.F1Score(task='multiclass',num_classes=self.hparams["num_classes"], average="micro")

        self.softmax = nn.Softmax(dim=1)
        self.validation_outputs = []
        self.test_outputs = []
        # Optimizer related variables
        self.learning_rate = self.hparams.get("learning_rate", 1e-4)
        self.scheduler_factor = self.hparams.get("scheduler_factor", 0.1)
        self.scheduler_patience = self.hparams.get("scheduler_patience", 0.05)
        self.scheduler_threshold = self.hparams.get("scheduler_threshold", 0.0001)
        self.scheduler_cooldown = self.hparams.get("scheduler_cooldown", 0)
        self.scheduler_eps = self.hparams.get("scheduler_eps", 1e-8)

    def forward(self, input_ids, attention_mask=None, labels=None, task=None):
        if task == "mlm":
            self.model.active_head= self.hparams['domain_adapter_name']
            outputs = self.model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        elif task == "classification":
            self.model.active_head= self.hparams['task_adapter_name']
            outputs = self.model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        else:
            raise ValueError("Task must be either 'mlm' or 'classification'.")
        return outputs

    def training_step(self, batch, batch_idx):
        # Separate source and target data processing
        source_input_ids = batch["source_input_ids"]
        source_attention_mask = batch["source_attention_mask"]
        source_labels = batch["label_source"]

        target_input_ids = batch["target_input_ids"]
        target_attention_mask = batch["target_attention_mask"]
        mlm_labels = batch["mlm_labels"]

        # Calculate dynamic alpha based on the lengths of source and target data
        alpha = self.alpha

        # Classification task
        cls_outputs = self(input_ids=source_input_ids, attention_mask=source_attention_mask, task="classification")
        cls_logits = cls_outputs.logits
        task_loss = self.criterion(cls_logits, source_labels)

        # MLM task
        mlm_outputs = self(input_ids=target_input_ids, attention_mask=target_attention_mask, labels=mlm_labels, task="mlm")
        mlm_loss = mlm_outputs.loss

        # Combine losses
        loss = alpha * task_loss + (1 - alpha) * mlm_loss

        accuracy = self.accuracy(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))
        f1 = self.f1(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))

        metrics = {
            "train/accuracy": accuracy,
            "train/f1": f1,
            "train/taskclf_loss": task_loss,
            "train/loss": loss,
            "train/mlm_loss": mlm_loss
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)

        return loss

    def validation_step(self, batch, batch_idx):
        # Separate source and target data processing
        source_input_ids = batch["source_input_ids"]
        source_attention_mask = batch["source_attention_mask"]
        source_labels = batch["label_source"]

        target_input_ids = batch["target_input_ids"]
        target_attention_mask = batch["target_attention_mask"]
        mlm_labels = batch["mlm_labels"]

        # Calculate dynamic alpha based on the lengths of source and target data
        alpha = self.alpha
        # Classification task
        cls_outputs = self(input_ids=source_input_ids, attention_mask=source_attention_mask, task="classification")
        cls_logits = cls_outputs.logits
        task_loss = self.criterion(cls_logits, source_labels)

        # MLM task
        mlm_outputs = self(input_ids=target_input_ids, attention_mask=target_attention_mask, labels=mlm_labels, task="mlm")
        mlm_loss = mlm_outputs.loss

        # Combine losses
        loss = alpha * task_loss + (1 - alpha) * mlm_loss

        accuracy = self.accuracy(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))
        f1 = self.f1(source_labels, torch.argmax(self.softmax(cls_logits), dim=1))
        self.validation_outputs.append({
            "val/accuracy": accuracy,
            "val/f1": f1,
            "val/taskclf_loss": task_loss,
            "val/loss": loss,
            "val/mlm_loss": mlm_loss
                })
        
        metrics = {
            "val/accuracy": accuracy,
            "val/f1": f1,
            "val/taskclf_loss": task_loss,
            "val/loss": loss,
            "val/mlm_loss": mlm_loss
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)

        return metrics
    def on_validation_epoch_start(self):
        self.validation_outputs = []
    
    def on_validation_epoch_end(self):
        outputs= self.validation_outputs

        avg_loss = torch.stack([x["val/loss"] for x in outputs]).mean()
        avg_task_loss = torch.stack([x["val/taskclf_loss"] for x in outputs]).mean()
        avg_mlm_loss = torch.stack([x["val/mlm_loss"] for x in outputs]).mean()
        avg_accuracy = torch.stack([x["val/accuracy"] for x in outputs]).mean()
        avg_f1 = torch.stack([x["val/f1"] for x in outputs]).mean()
        print(f"val/accuracy: {avg_accuracy}")
        print(f"val/f1: {avg_f1}")
        print(f"val/taskclf_loss: {avg_task_loss}")
        print(f"val/loss: {avg_loss}")
        print(f"val/mlm_loss: {avg_mlm_loss}")
        metrics = {
            "val/avg_loss": avg_loss,
            "val/avg_taskclf_loss": avg_task_loss,
            "val/avg_mlm_loss": avg_mlm_loss,
            "val/avg_accuracy": avg_accuracy,
            "val/avg_f1": avg_f1,
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)
        self.log("val_loss", avg_loss)

    def test_step(self, batch, batch_idx):
        # Separate source and target data processing
        source_input_ids = batch["source_input_ids"]
        source_attention_mask = batch["source_attention_mask"]
        source_labels = batch["label_source"]

        target_input_ids = batch["target_input_ids"]
        target_attention_mask = batch["target_attention_mask"]
        target_labels = batch["label_target"]

        # Classification task for source data
        cls_outputs_source = self(input_ids=source_input_ids, attention_mask=source_attention_mask, task="classification")
        cls_logits_source = cls_outputs_source.logits
        task_loss_source = self.criterion(cls_logits_source, source_labels)

        # Classification task for target data
        cls_outputs_target = self(input_ids=target_input_ids, attention_mask=target_attention_mask, task="classification")
        cls_logits_target = cls_outputs_target.logits
        task_loss_target = self.criterion(cls_logits_target, target_labels)

        # Combine losses (though typically you would evaluate them separately)
        loss = task_loss_source + task_loss_target

        accuracy_source = self.accuracy(source_labels, torch.argmax(self.softmax(cls_logits_source), dim=1))
        f1_source = self.f1(source_labels, torch.argmax(self.softmax(cls_logits_source), dim=1))
        f1_macro_source = self.f1_macro(source_labels, torch.argmax(self.softmax(cls_logits_source), dim=1))
        f1_micro_source = self.f1_micro(source_labels, torch.argmax(self.softmax(cls_logits_source), dim=1))
    
        accuracy_target = self.accuracy(target_labels, torch.argmax(self.softmax(cls_logits_target), dim=1))
        f1_target = self.f1(target_labels, torch.argmax(self.softmax(cls_logits_target), dim=1))
        f1_macro_target = self.f1_macro(target_labels, torch.argmax(self.softmax(cls_logits_target), dim=1))
        f1_micro_target = self.f1_micro(target_labels, torch.argmax(self.softmax(cls_logits_target), dim=1))

        metrics = {
            "source_test/loss": task_loss_source,
            "source_test/accuracy": accuracy_source,
            "source_test/f1": f1_source,
            "source_test/f1_macro": f1_macro_source,
            "source_test/f1_micro": f1_micro_source,
            "target_test/loss": task_loss_target,
            "target_test/accuracy": accuracy_target,
            "target_test/f1": f1_target,
            "target_test/f1_macro": f1_macro_target,
            "target_test/f1_micro": f1_micro_target,
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)

        self.test_outputs.append({
            "source_test/loss": task_loss_source,
            "source_test/accuracy": accuracy_source,
            "source_test/f1": f1_source,
            "source_test/f1_macro": f1_macro_source,
            "source_test/f1_micro": f1_micro_source,
            "target_test/loss": task_loss_target,
            "target_test/accuracy": accuracy_target,
            "target_test/f1": f1_target,
            "target_test/f1_macro": f1_macro_target,
            "target_test/f1_micro": f1_micro_target,
        })
        return metrics
    def on_test_epoch_start(self):
        self.test_outputs = []

    def on_test_epoch_end(self):
        outputs=  self.test_outputs

        avg_loss_source = torch.stack([x["source_test/loss"] for x in outputs]).mean()
        avg_task_loss_target = torch.stack([x["target_test/loss"] for x in outputs]).mean()
        avg_accuracy_source = torch.stack([x["source_test/accuracy"] for x in outputs]).mean()
        avg_f1_source = torch.stack([x["source_test/f1"] for x in outputs]).mean()
        avg_f1_macro_source = torch.stack([x["source_test/f1_macro"] for x in outputs]).mean()
        avg_f1_micro_source = torch.stack([x["source_test/f1_micro"] for x in outputs]).mean()

        avg_accuracy_target = torch.stack([x["target_test/accuracy"] for x in outputs]).mean()
        avg_f1_target = torch.stack([x["target_test/f1"] for x in outputs]).mean()
        avg_f1_macro_target = torch.stack([x["target_test/f1_macro"] for x in outputs]).mean()
        avg_f1_micro_target = torch.stack([x["target_test/f1_micro"] for x in outputs]).mean()

        metrics = {
            "source_test/loss": avg_loss_source,
            "target_test/loss": avg_task_loss_target,
            "source_test/accuracy": avg_accuracy_source,
            "source_test/f1": avg_f1_source,
            "source_test/f1_macro": avg_f1_macro_source,
            "source_test/f1_micro": avg_f1_micro_source,
            "target_test/accuracy": avg_accuracy_target,
            "target_test/f1": avg_f1_target,
            "target_test/f1_macro": avg_f1_macro_target,
            "target_test/f1_micro": avg_f1_micro_target,
        }

        for key, val in metrics.items():
            self.log(name=key, value=val)
    def save_adapter(self, location, adapter_name):
        self.model.save_adapter(location, adapter_name)

    def configure_optimizers(self):
        optimizer = optim.AdamW(self.parameters(), lr=self.hparams["learning_rate"])
        lr_scheduler = {
            'scheduler': optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=2, threshold=0.0001, cooldown=0, min_lr=1e-8),
            'monitor': 'val_loss'
        }
        return [optimizer], [lr_scheduler]


In [4]:
import wandb

wandb.login()
# Wandb setup and training loop
seeds = [42, 10, 100]  # List of seeds
project_name = 'mixed_edited'  # Replace with your wandb project name
domain = 'GTE'  # Replace with the specific domain for this notebook
type = 'unipelt'  # Replace with the specific type for this notebook

# Initialize results dictionary
results = {
    "last_epoch": {
        "source_test/loss": [],
        "source_test/accuracy": [],
        "source_test/f1": [],
        "source_test/f1_macro": [],
        "source_test/f1_micro": [],
        "target_test/loss": [],
        "target_test/accuracy": [],
        "target_test/f1": [],
        "target_test/f1_macro": [],
        "target_test/f1_micro": [],
    },
    "best_model": {
        "source_test/loss": [],
        "source_test/accuracy": [],
        "source_test/f1": [],
        "source_test/f1_macro": [],
        "source_test/f1_micro": [],
        "target_test/loss": [],
        "target_test/accuracy": [],
        "target_test/f1": [],
        "target_test/f1_macro": [],
        "target_test/f1_micro": [],
    },
    "epoch_saved": {
        "source_test/loss": [],
        "source_test/accuracy": [],
        "source_test/f1": [],
        "source_test/f1_macro": [],
        "source_test/f1_micro": [],
        "target_test/loss": [],
        "target_test/accuracy": [],
        "target_test/f1": [],
        "target_test/f1_macro": [],
        "target_test/f1_micro": [],
    }
}

best_val_loss = float('inf')
best_model = None
best_model_path = ""

[34m[1mwandb[0m: Currently logged in as: [33mmrawhani5[0m ([33mmrawhani[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [5]:
reload(processed)
for seed in seeds:
    #wandb.init(project=project_name, name=f'{domain}_{type}_run_with_seed_{seed}', config={'seed': seed})

    try:
        seed_everything(seed)

        hparams = {
            "source_target": "government_telephone",
            "source_domain": "government",
            "target_domain": "telephone",
            "domain_adapter_name": "mlm_unipelt_TE",
            "task_adapter_name": "GTEPelt",
            "pretrained_model_name": "bert-base-uncased",
            "padding": "max_length",
            "max_seq_length": 128,
            "bsz": 32,
            "num_classes": 3,
            "learning_rate": 1e-4,
            "reduction_factor": 16,
            "mode": "domain",
            "saved_adapter_dir": "../../saved/adapters",
        }

        save_dir = "checkpoints"
        save_epoch_3 = 6  # Save model at the 3rd epoch
        
        dm = processed.DataModuleSourceTargetMixed(hparams)
        dm.setup('fit')
        dm.setup("test")
        source_length, target_length = dm.get_dataset_lengths()
        print(f"Source dataset length: {source_length}")
        print(f"Target dataset length: {target_length}")
        model = JointDomainTaskAdapter(hparams,source_length,target_length)

        checkpoint_callback = ModelCheckpoint(
            filename="task-GTEPelt-{epoch:02d}-{val_loss:.2f}",
            save_top_k=1,
            monitor="val_loss",
            mode="min",
        )
        save_model_callback_epoch = ModelCheckpoint(
            filename="GTEPelt-{epoch:02d}",
            every_n_epochs=save_epoch_3,
            save_top_k=-1,
        )

        #wandb_logger = WandbLogger()

    except Exception as e:
        print(f"Error during preprocessing : {e}")

    try:
        train_loader = dm.train_dataloader()
        val_loader = dm.val_dataloader()
        trainer = Trainer(
            max_epochs=10,
            accelerator="auto",
            precision=16,
            
            default_root_dir="checkpoints",
            #logger=wandb_logger,
            callbacks=[checkpoint_callback, save_model_callback_epoch],
            limit_train_batches=1.0,
            limit_val_batches=1.0,
            limit_test_batches=1.0,
        )

        trainer.fit(model, train_loader, val_loader)
        print(f"Best checkpoint path: {checkpoint_callback.best_model_path}")
        print(f"Saved epoch checkpoint path: {save_model_callback_epoch.best_model_path}")
    except Exception as e:
        print(f"Error during training : {e}")

    try:
        dm.setup("test")
        test_loader = dm.test_dataloader()
        test_results_last = trainer.test(model, test_loader)
        print("Test Results Last Epoch:", test_results_last)

        # Collect results for last epoch model
        for key, value in test_results_last[0].items():
            results["last_epoch"][key].append(value)

        # Paths to the saved checkpoints
        best_checkpoint_path = checkpoint_callback.best_model_path
        saved_epoch_checkpoint_path = save_model_callback_epoch.best_model_path
        print(f"Best checkpoint path: {best_checkpoint_path}")
        print(f"Saved epoch checkpoint path: {saved_epoch_checkpoint_path}")

        best_model = JointDomainTaskAdapter.load_from_checkpoint(best_checkpoint_path,source_dataset_length=source_length, target_dataset_length=target_length)
        test_results_best = trainer.test(best_model, test_loader)
        print("Test Results on Best Model:", test_results_best)
        for key, value in test_results_best[0].items():
            results["best_model"][key].append(value)

        saved_epoch_model = JointDomainTaskAdapter.load_from_checkpoint(saved_epoch_checkpoint_path,source_dataset_length=source_length, target_dataset_length=target_length)
        test_results_saved_epoch = trainer.test(saved_epoch_model, test_loader)
        print("Test Results on saved epoch:", test_results_saved_epoch)
        for key, value in test_results_saved_epoch[0].items():
            results["epoch_saved"][key].append(value)

    except Exception as e:
        print(f"Error during testing: {e}")

    #wandb.finish()

Seed set to 42




Batch size: 32


Filter:   0%|          | 0/392702 [00:00<?, ? examples/s]

Filter:   0%|          | 0/392702 [00:00<?, ? examples/s]

Filter:   0%|          | 0/9815 [00:00<?, ? examples/s]

Filter:   0%|          | 0/9815 [00:00<?, ? examples/s]

Source genre: government
Target genre: telephone
Number of target samples: 75013


Map:   0%|          | 0/75013 [00:00<?, ? examples/s]

Map:   0%|          | 0/75013 [00:00<?, ? examples/s]

Map:   0%|          | 0/8335 [00:00<?, ? examples/s]

Map:   0%|          | 0/8335 [00:00<?, ? examples/s]

Filter:   0%|          | 0/392702 [00:00<?, ? examples/s]

Filter:   0%|          | 0/392702 [00:00<?, ? examples/s]

Filter:   0%|          | 0/9815 [00:00<?, ? examples/s]

Filter:   0%|          | 0/9815 [00:00<?, ? examples/s]

Source genre: government


Target genre: telephone
Number of target samples: 75013


Map:   0%|          | 0/75013 [00:00<?, ? examples/s]

Map:   0%|          | 0/75013 [00:00<?, ? examples/s]

Map:   0%|          | 0/8335 [00:00<?, ? examples/s]

Map:   0%|          | 0/8335 [00:00<?, ? examples/s]

Source dataset length: 69615
Target dataset length: 24796


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  state_dict = torch.load(weights_file, map_location="cpu")


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/lightning_fabric/connector.py:563: `precision=16` is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
Using 16bit Automatic Mixed Precision (AMP)


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/pytorch_lightning/plugins/precision/amp.py:55: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.
GPU available: True (cuda), used: True


TPU available: False, using: 0 TPU cores


IPU available: False, using: 0 IPUs


HPU available: False, using: 0 HPUs


`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..


`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..


`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


eee



  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | BertAdapterModel   | 122 M 
1 | criterion     | CrossEntropyLoss   | 0     
2 | mlm_criterion | CrossEntropyLoss   | 0     
3 | accuracy      | MulticlassAccuracy | 0     
4 | f1            | MulticlassF1Score  | 0     
5 | f1_macro      | MulticlassF1Score  | 0     
6 | f1_micro      | MulticlassF1Score  | 0     
7 | softmax       | Softmax            | 0     
-----------------------------------------------------
12.9 M    Trainable params
109 M     Non-trainable params
122 M     Total params
489.615   Total estimated model params size (MB)


Sanity Checking: |                                                                                            …

val/accuracy: 0.359375
val/f1: 0.4054788053035736
val/taskclf_loss: 1.0985183715820312
val/loss: 1.3009107112884521
val/mlm_loss: 1.869128942489624


Training: |                                                                                                   …

Validation: |                                                                                                 …

val/accuracy: 0.783035397529602
val/f1: 0.7833187580108643
val/taskclf_loss: 0.5471510291099548
val/loss: 0.868341326713562
val/mlm_loss: 1.7700860500335693


Validation: |                                                                                                 …

val/accuracy: 0.8021469116210938
val/f1: 0.8021782040596008
val/taskclf_loss: 0.5173241496086121
val/loss: 0.8425845503807068
val/mlm_loss: 1.7557562589645386


Validation: |                                                                                                 …

val/accuracy: 0.8086035251617432
val/f1: 0.8081539869308472
val/taskclf_loss: 0.5052411556243896
val/loss: 0.8340374827384949
val/mlm_loss: 1.7571361064910889


Validation: |                                                                                                 …

val/accuracy: 0.8150601387023926
val/f1: 0.8149205446243286
val/taskclf_loss: 0.5075498819351196
val/loss: 0.8328185677528381
val/mlm_loss: 1.7460134029388428


Validation: |                                                                                                 …

val/accuracy: 0.8142347931861877
val/f1: 0.8139705061912537
val/taskclf_loss: 0.5080420970916748
val/loss: 0.8304997682571411
val/mlm_loss: 1.735802412033081


Validation: |                                                                                                 …

val/accuracy: 0.8190127015113831
val/f1: 0.8189710974693298
val/taskclf_loss: 0.5127444863319397
val/loss: 0.8365585207939148
val/mlm_loss: 1.7456692457199097


Validation: |                                                                                                 …

val/accuracy: 0.8148804903030396
val/f1: 0.8141208291053772
val/taskclf_loss: 0.5360828042030334
val/loss: 0.8514423370361328
val/mlm_loss: 1.7368171215057373


Validation: |                                                                                                 …

val/accuracy: 0.8180582523345947
val/f1: 0.8178825378417969
val/taskclf_loss: 0.5235021710395813
val/loss: 0.8388193845748901
val/mlm_loss: 1.724075198173523


Validation: |                                                                                                 …

val/accuracy: 0.8271761536598206
val/f1: 0.8273851275444031
val/taskclf_loss: 0.5352509617805481
val/loss: 0.8483098149299622
val/mlm_loss: 1.7272251844406128


Validation: |                                                                                                 …

`Trainer.fit` stopped: `max_epochs=10` reached.


val/accuracy: 0.826659619808197
val/f1: 0.8269862532615662
val/taskclf_loss: 0.5429847240447998
val/loss: 0.8530588150024414
val/mlm_loss: 1.7235946655273438


Best checkpoint path: checkpoints/lightning_logs/version_6/checkpoints/task-GTEPelt-epoch=04-val_loss=0.83.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_6/checkpoints/GTEPelt-epoch=05.ckpt


Filter:   0%|          | 0/392702 [00:00<?, ? examples/s]

Filter:   0%|          | 0/392702 [00:00<?, ? examples/s]

Filter:   0%|          | 0/9815 [00:00<?, ? examples/s]

Filter:   0%|          | 0/9815 [00:00<?, ? examples/s]

Source genre: government
Target genre: telephone
Number of target samples: 75013


Map:   0%|          | 0/75013 [00:00<?, ? examples/s]

Map:   0%|          | 0/75013 [00:00<?, ? examples/s]

Map:   0%|          | 0/8335 [00:00<?, ? examples/s]

Map:   0%|          | 0/8335 [00:00<?, ? examples/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results Last Epoch: [{'source_test/loss': 0.5861151814460754, 'source_test/accuracy': 0.8256767988204956, 'source_test/f1': 0.8251031041145325, 'source_test/f1_macro': 0.8203700184822083, 'source_test/f1_micro': 0.8256767988204956, 'target_test/loss': 0.7881933450698853, 'target_test/accuracy': 0.7441676259040833, 'target_test/f1': 0.7448318600654602, 'target_test/f1_macro': 0.7318492531776428, 'target_test/f1_micro': 0.7441676259040833}]
Best checkpoint path: checkpoints/lightning_logs/version_6/checkpoints/task-GTEPelt-epoch=04-val_loss=0.83.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_6/checkpoints/GTEPelt-epoch=05.ckpt


/home/guest/.cache/pypoetry/virtualenvs/third-experments-xuKQSur9-py3.8/lib/python3.8/site-packages/lightning_fabric/utilities/cloud_io.py:57: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results on Best Model: [{'source_test/loss': 0.5394529700279236, 'source_test/accuracy': 0.8045074343681335, 'source_test/f1': 0.8039987683296204, 'source_test/f1_macro': 0.7998712658882141, 'source_test/f1_micro': 0.8045074343681335, 'target_test/loss': 0.6952080726623535, 'target_test/accuracy': 0.7296947240829468, 'target_test/f1': 0.7283468842506409, 'target_test/f1_macro': 0.720272421836853, 'target_test/f1_micro': 0.7296947240829468}]


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Seed set to 10


Test Results on saved epoch: [{'source_test/loss': 0.5401831865310669, 'source_test/accuracy': 0.8085396885871887, 'source_test/f1': 0.8081749677658081, 'source_test/f1_macro': 0.8029620051383972, 'source_test/f1_micro': 0.8085396885871887, 'target_test/loss': 0.7303146123886108, 'target_test/accuracy': 0.7273905277252197, 'target_test/f1': 0.7274670600891113, 'target_test/f1_macro': 0.7157220840454102, 'target_test/f1_micro': 0.7273905277252197}]
Batch size: 32


Source genre: government


Target genre: telephone
Number of target samples: 75013


Source genre: government
Target genre: telephone
Number of target samples: 75013


Source dataset length: 69615
Target dataset length: 24796


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Using 16bit Automatic Mixed Precision (AMP)


GPU available: True (cuda), used: True


TPU available: False, using: 0 TPU cores


IPU available: False, using: 0 IPUs


HPU available: False, using: 0 HPUs


`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..


`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..


`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | BertAdapterModel   | 122 M 
1 | criterion     | CrossEntropyLoss   | 0     
2 | mlm_criterion | CrossEntropyLoss   | 0     
3 | accuracy      | MulticlassAccuracy | 0     
4 | f1            | MulticlassF1Score  | 0     
5 | f1_macro      | MulticlassF1Score  | 0     
6 | f1_micro      | MulticlassF1Score  | 0     
7 | softmax       | Softmax            | 0     
-----------------------------------------------------
12.9 M    Trainable params
109 M     Non-trainable params
122 M     Total params
489.615   Total estimated model params size (MB)


eee


Sanity Checking: |                                                                                            …

val/accuracy: 0.328125
val/f1: 0.464813232421875
val/taskclf_loss: 1.0996856689453125
val/loss: 1.3243076801300049
val/mlm_loss: 1.9549357891082764


Training: |                                                                                                   …

Validation: |                                                                                                 …

val/accuracy: 0.7779486179351807
val/f1: 0.7786813378334045
val/taskclf_loss: 0.5459587574005127
val/loss: 0.8676323294639587
val/mlm_loss: 1.7707337141036987


Validation: |                                                                                                 …

val/accuracy: 0.8075705170631409
val/f1: 0.8077964782714844
val/taskclf_loss: 0.4996713399887085
val/loss: 0.8293103575706482
val/mlm_loss: 1.754775047302246


Validation: |                                                                                                 …

val/accuracy: 0.8110570907592773
val/f1: 0.8110413551330566
val/taskclf_loss: 0.4910622239112854
val/loss: 0.8220059275627136
val/mlm_loss: 1.7511333227157593


Validation: |                                                                                                 …

val/accuracy: 0.8158349394798279
val/f1: 0.8157492280006409
val/taskclf_loss: 0.48774459958076477
val/loss: 0.8191289901733398
val/mlm_loss: 1.7494935989379883


Validation: |                                                                                                 …

val/accuracy: 0.8234537839889526
val/f1: 0.8236840963363647
val/taskclf_loss: 0.48351728916168213
val/loss: 0.8122215867042542
val/mlm_loss: 1.7350616455078125


Validation: |                                                                                                 …

val/accuracy: 0.8247450590133667
val/f1: 0.8245156407356262
val/taskclf_loss: 0.49373504519462585
val/loss: 0.8233011960983276
val/mlm_loss: 1.7485610246658325


Validation: |                                                                                                 …

val/accuracy: 0.8188049793243408
val/f1: 0.8182145953178406
val/taskclf_loss: 0.5006548166275024
val/loss: 0.8277772665023804
val/mlm_loss: 1.7461766004562378


Validation: |                                                                                                 …

val/accuracy: 0.8181593418121338
val/f1: 0.8177072405815125
val/taskclf_loss: 0.5001867413520813
val/loss: 0.8279479146003723
val/mlm_loss: 1.7481403350830078


Validation: |                                                                                                 …

val/accuracy: 0.8337843418121338
val/f1: 0.8336358666419983
val/taskclf_loss: 0.5071918964385986
val/loss: 0.8271725177764893
val/mlm_loss: 1.7255209684371948


Validation: |                                                                                                 …

`Trainer.fit` stopped: `max_epochs=10` reached.


val/accuracy: 0.8330880999565125
val/f1: 0.8329830765724182
val/taskclf_loss: 0.5148947834968567
val/loss: 0.83175128698349
val/mlm_loss: 1.7213287353515625


Best checkpoint path: checkpoints/lightning_logs/version_7/checkpoints/task-GTEPelt-epoch=04-val_loss=0.81.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_7/checkpoints/GTEPelt-epoch=05.ckpt


Source genre: government


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Target genre: telephone
Number of target samples: 75013


Testing: |                                                                                                    …

Test Results Last Epoch: [{'source_test/loss': 0.5556137561798096, 'source_test/accuracy': 0.8246687650680542, 'source_test/f1': 0.8233944773674011, 'source_test/f1_macro': 0.8191143870353699, 'source_test/f1_micro': 0.8246687650680542, 'target_test/loss': 0.7474626302719116, 'target_test/accuracy': 0.7481998801231384, 'target_test/f1': 0.7488167881965637, 'target_test/f1_macro': 0.7376972436904907, 'target_test/f1_micro': 0.7481998801231384}]
Best checkpoint path: checkpoints/lightning_logs/version_7/checkpoints/task-GTEPelt-epoch=04-val_loss=0.81.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_7/checkpoints/GTEPelt-epoch=05.ckpt


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results on Best Model: [{'source_test/loss': 0.5199747085571289, 'source_test/accuracy': 0.8075316548347473, 'source_test/f1': 0.8069212436676025, 'source_test/f1_macro': 0.8021917939186096, 'source_test/f1_micro': 0.8075316548347473, 'target_test/loss': 0.6829482316970825, 'target_test/accuracy': 0.7334389090538025, 'target_test/f1': 0.7334195375442505, 'target_test/f1_macro': 0.7236917614936829, 'target_test/f1_micro': 0.7334389090538025}]


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Seed set to 100


Test Results on saved epoch: [{'source_test/loss': 0.5423531532287598, 'source_test/accuracy': 0.8055155277252197, 'source_test/f1': 0.8048443794250488, 'source_test/f1_macro': 0.7998210787773132, 'source_test/f1_micro': 0.8055155277252197, 'target_test/loss': 0.6947317719459534, 'target_test/accuracy': 0.7417914867401123, 'target_test/f1': 0.7404650449752808, 'target_test/f1_macro': 0.7315645217895508, 'target_test/f1_micro': 0.7417914867401123}]
Batch size: 32


Source genre: government
Target genre: telephone
Number of target samples: 75013


Source genre: government
Target genre: telephone
Number of target samples: 75013


Source dataset length: 69615
Target dataset length: 24796


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Using 16bit Automatic Mixed Precision (AMP)


GPU available: True (cuda), used: True


TPU available: False, using: 0 TPU cores


IPU available: False, using: 0 IPUs


HPU available: False, using: 0 HPUs


`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..


`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..


`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | BertAdapterModel   | 122 M 
1 | criterion     | CrossEntropyLoss   | 0     
2 | mlm_criterion | CrossEntropyLoss   | 0     
3 | accuracy      | MulticlassAccuracy | 0     
4 | f1            | MulticlassF1Score  | 0     
5 | f1_macro      | MulticlassF1Score  | 0     
6 | f1_micro      | MulticlassF1Score  | 0     
7 | softmax       | Softmax            | 0     
-----------------------------------------------------
12.9 M    Trainable params
109 M     Non-trainable params
122 M     Total params
489.615   Total estimated model params size (MB)


eee


Sanity Checking: |                                                                                            …

val/accuracy: 0.453125
val/f1: 0.4406209886074066
val/taskclf_loss: 1.0946807861328125
val/loss: 1.3268319368362427
val/mlm_loss: 1.9785983562469482


Training: |                                                                                                   …

Validation: |                                                                                                 …

val/accuracy: 0.7934950590133667
val/f1: 0.7929096817970276
val/taskclf_loss: 0.5249777436256409
val/loss: 0.853722870349884
val/mlm_loss: 1.7766774892807007


Validation: |                                                                                                 …

val/accuracy: 0.8129940629005432
val/f1: 0.8125742673873901
val/taskclf_loss: 0.4858967363834381
val/loss: 0.8214196562767029
val/mlm_loss: 1.7634034156799316


Validation: |                                                                                                 …

val/accuracy: 0.8180301785469055
val/f1: 0.8176507353782654
val/taskclf_loss: 0.47595804929733276
val/loss: 0.813459575176239
val/mlm_loss: 1.7609983682632446


Validation: |                                                                                                 …

val/accuracy: 0.8222410082817078
val/f1: 0.8216063380241394
val/taskclf_loss: 0.47033190727233887
val/loss: 0.8094955086708069
val/mlm_loss: 1.76170015335083


Validation: |                                                                                                 …

val/accuracy: 0.8216459155082703
val/f1: 0.8208410739898682
val/taskclf_loss: 0.4841495156288147
val/loss: 0.8193536996841431
val/mlm_loss: 1.7604424953460693


Validation: |                                                                                                 …

val/accuracy: 0.8289053440093994
val/f1: 0.828691303730011
val/taskclf_loss: 0.4843117594718933
val/loss: 0.8134473562240601
val/mlm_loss: 1.73749840259552


Validation: |                                                                                                 …

val/accuracy: 0.8221624493598938
val/f1: 0.8216067552566528
val/taskclf_loss: 0.4966898560523987
val/loss: 0.8211978077888489
val/mlm_loss: 1.7322566509246826


Validation: |                                                                                                 …

val/accuracy: 0.8338629007339478
val/f1: 0.8337292671203613
val/taskclf_loss: 0.4848065674304962
val/loss: 0.8156365752220154
val/mlm_loss: 1.7444449663162231


Validation: |                                                                                                 …

val/accuracy: 0.8346377015113831
val/f1: 0.8346251845359802
val/taskclf_loss: 0.48854267597198486
val/loss: 0.8177613615989685
val/mlm_loss: 1.7420458793640137


Validation: |                                                                                                 …

`Trainer.fit` stopped: `max_epochs=10` reached.


val/accuracy: 0.8348959684371948
val/f1: 0.8349175453186035
val/taskclf_loss: 0.4862629473209381
val/loss: 0.814564049243927
val/mlm_loss: 1.7362722158432007


Best checkpoint path: checkpoints/lightning_logs/version_8/checkpoints/task-GTEPelt-epoch=03-val_loss=0.81.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_8/checkpoints/GTEPelt-epoch=05.ckpt


Source genre: government
Target genre: telephone
Number of target samples: 75013


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results Last Epoch: [{'source_test/loss': 0.4849169850349426, 'source_test/accuracy': 0.8423098921775818, 'source_test/f1': 0.8411466479301453, 'source_test/f1_macro': 0.8380321264266968, 'source_test/f1_micro': 0.8423098921775818, 'target_test/loss': 0.6773848533630371, 'target_test/accuracy': 0.760296642780304, 'target_test/f1': 0.7595835328102112, 'target_test/f1_macro': 0.7502824068069458, 'target_test/f1_micro': 0.760296642780304}]
Best checkpoint path: checkpoints/lightning_logs/version_8/checkpoints/task-GTEPelt-epoch=03-val_loss=0.81.ckpt
Saved epoch checkpoint path: checkpoints/lightning_logs/version_8/checkpoints/GTEPelt-epoch=05.ckpt


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results on Best Model: [{'source_test/loss': 0.4856506288051605, 'source_test/accuracy': 0.8140840530395508, 'source_test/f1': 0.8132938742637634, 'source_test/f1_macro': 0.8095536828041077, 'source_test/f1_micro': 0.8140840530395508, 'target_test/loss': 0.6590970754623413, 'target_test/accuracy': 0.7320708632469177, 'target_test/f1': 0.731035590171814, 'target_test/f1_macro': 0.7217228412628174, 'target_test/f1_micro': 0.7320708632469177}]


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                                                                    …

Test Results on saved epoch: [{'source_test/loss': 0.5053554773330688, 'source_test/accuracy': 0.8261808156967163, 'source_test/f1': 0.8251906037330627, 'source_test/f1_macro': 0.8214964866638184, 'source_test/f1_micro': 0.8261808156967163, 'target_test/loss': 0.6793693900108337, 'target_test/accuracy': 0.752592146396637, 'target_test/f1': 0.7522187232971191, 'target_test/f1_macro': 0.7425521612167358, 'target_test/f1_micro': 0.752592146396637}]


In [6]:
results.items()

dict_items([('last_epoch', {'source_test/loss': [0.5861151814460754, 0.5556137561798096, 0.4849169850349426], 'source_test/accuracy': [0.8256767988204956, 0.8246687650680542, 0.8423098921775818], 'source_test/f1': [0.8251031041145325, 0.8233944773674011, 0.8411466479301453], 'source_test/f1_macro': [0.8203700184822083, 0.8191143870353699, 0.8380321264266968], 'source_test/f1_micro': [0.8256767988204956, 0.8246687650680542, 0.8423098921775818], 'target_test/loss': [0.7881933450698853, 0.7474626302719116, 0.6773848533630371], 'target_test/accuracy': [0.7441676259040833, 0.7481998801231384, 0.760296642780304], 'target_test/f1': [0.7448318600654602, 0.7488167881965637, 0.7595835328102112], 'target_test/f1_macro': [0.7318492531776428, 0.7376972436904907, 0.7502824068069458], 'target_test/f1_micro': [0.7441676259040833, 0.7481998801231384, 0.760296642780304]}), ('best_model', {'source_test/loss': [0.5394529700279236, 0.5199747085571289, 0.4856506288051605], 'source_test/accuracy': [0.8045074

In [7]:
# Calculate mean and standard deviation for each scenario
mean_results = {scenario: {key: np.mean(values) for key, values in metrics.items()} for scenario, metrics in results.items()}
std_results = {scenario: {key: np.std(values) for key, values in metrics.items()} for scenario, metrics in results.items()}

# Log mean and standard deviation results to wandb
# wandb.init(project=project_name, name=f'{domain}_mean_results')
# for scenario in mean_results:
#     for key, value in mean_results[scenario].items():
#         wandb.log({f"{scenario}/{key}": value})
#         wandb.log({f"{scenario}/{key}_std": std_results[scenario][key]})
# wandb.finish()

print("Mean Results:", mean_results)
print("Standard Deviation Results:", std_results)

# # Save the best model's adapter
# if model:
#     adapter_save_path = f"../../saved/adapter_after_run/{hparams['task_adapter_name']}"
#     model.save_adapter(adapter_save_path, hparams['task_adapter_name'])
#     print(f"Adapter saved to {adapter_save_path}")
# else:
#     print("No best model to save.")

Mean Results: {'last_epoch': {'source_test/loss': 0.5422153075536092, 'source_test/accuracy': 0.8308851520220438, 'source_test/f1': 0.8298814098040262, 'source_test/f1_macro': 0.8258388439814249, 'source_test/f1_micro': 0.8308851520220438, 'target_test/loss': 0.7376802762349447, 'target_test/accuracy': 0.7508880496025085, 'target_test/f1': 0.751077393690745, 'target_test/f1_macro': 0.7399429678916931, 'target_test/f1_micro': 0.7508880496025085}, 'best_model': {'source_test/loss': 0.5150261024634043, 'source_test/accuracy': 0.8087077140808105, 'source_test/f1': 0.8080712954203287, 'source_test/f1_macro': 0.8038722475369772, 'source_test/f1_micro': 0.8087077140808105, 'target_test/loss': 0.6790844599405924, 'target_test/accuracy': 0.731734832127889, 'target_test/f1': 0.7309340039889017, 'target_test/f1_macro': 0.721895674864451, 'target_test/f1_micro': 0.731734832127889}, 'epoch_saved': {'source_test/loss': 0.5292972723642985, 'source_test/accuracy': 0.8134120106697083, 'source_test/f1':

In [8]:
print('dones')

dones


In [9]:
best_val_loss

inf