Inspired by: https://huggingface.co/blog/train-sentence-transformers

In [None]:
import os
from datasets import load_dataset
from sentence_transformers import SentenceTransformer, SentenceTransformerTrainer, SentenceTransformerTrainingArguments
from sentence_transformers.losses import CoSENTLoss, MultipleNegativesRankingLoss, SoftmaxLoss
from sentence_transformers.evaluation import EmbeddingSimilarityEvaluator, TripletEvaluator, LabelAccuracyEvaluator, SequentialEvaluator
from transformers import EarlyStoppingCallback
import wandb

# Set environment variables
os.environ["NCCL_P2P_DISABLE"] = "1"
os.environ["NCCL_IB_DISABLE"] = "1"

# Initialize W&B project
wandb.login(key=os.getenv('WANDB_API_TOKEN'))

base_model_username = 'allenai'
base_model_name = 'longformer-base-4096'

# 1. Load a model to finetune
model = SentenceTransformer(f"{base_model_username}/{base_model_name}")

# 2. Load datasets for training and evaluation
train_dataset = {
    "all-nli-pair": load_dataset("sentence-transformers/all-nli", "pair", split="train[:10000]"),
    "all-nli-pair-class": load_dataset("sentence-transformers/all-nli", "pair-class", split="train[:10000]"),
    "all-nli-pair-score": load_dataset("sentence-transformers/all-nli", "pair-score", split="train[:10000]"),
    "all-nli-triplet": load_dataset("sentence-transformers/all-nli", "triplet", split="train[:10000]"),
    "stsb": load_dataset("sentence-transformers/stsb", split="train[:10000]"),
    "quora": load_dataset("sentence-transformers/quora-duplicates", "pair", split="train[:10000]"),
    "natural-questions": load_dataset("sentence-transformers/natural-questions", split="train[:10000]"),
}

eval_dataset = {
    "all-nli-triplet": load_dataset("sentence-transformers/all-nli", "triplet", split="dev"),
    "stsb": load_dataset("sentence-transformers/stsb", split="validation"),
    "quora": load_dataset("sentence-transformers/quora-duplicates", "pair", split="train[10000:11000]"),
    "natural-questions": load_dataset("sentence-transformers/natural-questions", split="train[10000:11000]"),
}

# Define loss functions
embedding_dim = model.get_sentence_embedding_dimension()
num_labels = len(set(train_dataset["all-nli-pair-class"]["label"]))
mnrl_loss = MultipleNegativesRankingLoss(model)
softmax_loss = SoftmaxLoss(model, sentence_embedding_dimension=embedding_dim, num_labels=num_labels)
cosent_loss = CoSENTLoss(model)

losses = {
    "all-nli-pair": mnrl_loss,
    "all-nli-pair-class": softmax_loss,
    "all-nli-pair-score": cosent_loss,
    "all-nli-triplet": mnrl_loss,
    "stsb": cosent_loss,
    "quora": mnrl_loss,
    "natural-questions": mnrl_loss,
}

# Define evaluators for each dataset
stsb_evaluator = EmbeddingSimilarityEvaluator(
    sentences1=eval_dataset["stsb"]["sentence1"],
    sentences2=eval_dataset["stsb"]["sentence2"],
    scores=eval_dataset["stsb"]["score"],
    name="sts-dev",
)

all_nli_triplet_evaluator = TripletEvaluator(
    anchors=eval_dataset["all-nli-triplet"]["anchor"],
    positives=eval_dataset["all-nli-triplet"]["positive"],
    negatives=eval_dataset["all-nli-triplet"]["negative"],
    name="triplet-dev",
)

# Create EmbeddingSimilarityEvaluator for classification tasks
label_accuracy_evaluator = EmbeddingSimilarityEvaluator(
    sentences1=train_dataset["all-nli-pair-class"]["premise"],  # Premises
    sentences2=train_dataset["all-nli-pair-class"]["hypothesis"],  # Hypotheses
    scores=train_dataset["all-nli-pair-class"]["label"],  # Ground-truth labels
    name="label-accuracy-dev",
)


# Combine evaluators into a SequentialEvaluator
evaluator = SequentialEvaluator(
    [stsb_evaluator, all_nli_triplet_evaluator, label_accuracy_evaluator],
    main_score_function=lambda scores: scores[-1] if scores else 0,
)

# Define custom aggregate metric
def compute_aggregate_metric(metrics):
    """
    Combine metrics from multiple evaluators into a single score for optimization.
    """
    stsb_score = metrics.get("eval_sts-dev_spearman_cosine", 0)
    triplet_score = metrics.get("eval_triplet-dev_mean_rank", 0)  # Replace with actual triplet metric key
    label_accuracy_score = metrics.get("eval_label-accuracy-dev_accuracy", 0)

    # Weighted average of metrics
    aggregate_score = (
        0.4 * stsb_score +
        0.4 * triplet_score +
        0.2 * label_accuracy_score
    )
    return aggregate_score

# Define W&B sweep configuration
sweep_config = {
    "method": "bayes",
    "metric": {"name": "aggregate_score", "goal": "maximize"},
    "parameters": {
        "learning_rate": {"min": 2e-5, "max": 5e-4},
        "warmup_steps": {"values": [0.1, 0.2, 0.3]},
    },
}

# Initialize W&B sweep
sweep_id = wandb.sweep(sweep_config, project="sentence-transformers-sweep")

# Define training function
def train_model(config=None):
    with wandb.init(config=config):
        config = wandb.config

        # Define training arguments
        total_steps = len(train_dataset)
        warmup_steps = int(config.warmup_steps * total_steps)

        training_args = SentenceTransformerTrainingArguments(
            output_dir=f"./{base_model_name}-fine-tuned",
            overwrite_output_dir=True,
            evaluation_strategy="steps",
            eval_steps=500,
            save_total_limit=3,
            load_best_model_at_end=True,
            metric_for_best_model="aggregate_score",  # Use custom metric here
            greater_is_better=True,
            per_device_train_batch_size=8,
            per_device_eval_batch_size=8,
            num_train_epochs=10,
            warmup_steps=warmup_steps,
            learning_rate=config.learning_rate,
            logging_dir="./logs",
            logging_steps=200,
            save_steps=500,
        )

        early_stopping = EarlyStoppingCallback(early_stopping_patience=3, early_stopping_threshold=0.01)

        # Define SequentialEvaluator
        evaluator = SequentialEvaluator(
            [stsb_evaluator, all_nli_triplet_evaluator, label_accuracy_evaluator],
            main_score_function=lambda scores: scores[-1] if scores else 0,
        )

        # Train model
        trainer = SentenceTransformerTrainer(
            model=model,
            train_dataset=train_dataset,
            eval_dataset=eval_dataset,
            loss=losses,
            args=training_args,
            evaluator=evaluator,
            callbacks=[early_stopping],
        )

        # Save a reference to the original evaluate method
        original_evaluate = trainer.evaluate

        # Wrap evaluation to include custom aggregate metric
        def custom_evaluate(*args, **kwargs):
            metrics = original_evaluate(*args, **kwargs)  # Call the original evaluate method
            # Calculate the aggregate score
            aggregate_score = compute_aggregate_metric(metrics)
            metrics["aggregate_score"] = aggregate_score
            wandb.log({"aggregate_score": aggregate_score})
            return metrics

        # Override the trainer's evaluate function
        trainer.evaluate = custom_evaluate

        trainer.train()

        # Save the model
        model.save_pretrained(f"{base_model_name}-best-model")
        wandb.save(f"{base_model_name}-best-model/*")



# Start W&B agent for the sweep
wandb.agent(sweep_id, function=train_model)

# Push the best model to the Hugging Face Hub with W&B config
model.push_to_hub(
    f"{base_model_name}-sentence-transformers-best",
    exist_ok=True,
)

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mleo1212[0m ([33mhslu_nlp[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/leonkrug/.netrc
No sentence-transformers model found with name allenai/longformer-base-4096. Creating a new one with mean pooling.


Create sweep with ID: mbg5ckdp
Sweep URL: https://wandb.ai/hslu_nlp/sentence-transformers-sweep/sweeps/mbg5ckdp


[34m[1mwandb[0m: Agent Starting Run: 8yu15rc5 with config:
[34m[1mwandb[0m: 	learning_rate: 0.00045788302667372136
[34m[1mwandb[0m: 	warmup_steps: 0.3


Currently using DataParallel (DP) for multi-gpu training, while DistributedDataParallel (DDP) is recommended for faster training. See https://sbert.net/docs/sentence_transformer/training/distributed.html for more information.


  0%|          | 0/41100 [00:00<?, ?it/s]

Input ids are automatically padded to be a multiple of `config.attention_window`: 512
Column 'hypothesis' is at index 1, whereas a column with this name is usually expected at index 0. Note that the column order can be important for some losses, e.g. MultipleNegativesRankingLoss will always consider the first column as the anchor and the second as the positive, regardless of the dataset column names. Consider renaming the columns to match the expected order, e.g.:
dataset = dataset.select_columns(['hypothesis', 'entailment', 'contradiction'])


{'loss': 3.3109, 'grad_norm': 2.5075600147247314, 'learning_rate': 0.00045567705949085607, 'epoch': 0.05}
{'loss': 3.5823, 'grad_norm': 0.04463895410299301, 'learning_rate': 0.0004534488098111942, 'epoch': 0.1}


  0%|          | 0/412 [00:00<?, ?it/s]

  eval_pearson_cosine, _ = pearsonr(labels, cosine_scores)
  eval_spearman_cosine, _ = spearmanr(labels, cosine_scores)
  eval_pearson_cosine, _ = pearsonr(labels, cosine_scores)
  eval_spearman_cosine, _ = spearmanr(labels, cosine_scores)
early stopping required metric_for_best_model, but did not find eval_aggregate_score so early stopping is disabled


{'eval_all-nli-triplet_loss': 3.4648938179016113, 'eval_sts-dev_pearson_cosine': nan, 'eval_sts-dev_spearman_cosine': nan, 'eval_sts-dev_pearson_manhattan': 0.1953366031192939, 'eval_sts-dev_spearman_manhattan': 0.18628029922412706, 'eval_sts-dev_pearson_euclidean': 0.12038330059026879, 'eval_sts-dev_spearman_euclidean': 0.11701423250889276, 'eval_sts-dev_pearson_dot': -0.020898059060793592, 'eval_sts-dev_spearman_dot': -0.019267171663208498, 'eval_sts-dev_pearson_max': nan, 'eval_sts-dev_spearman_max': nan, 'eval_triplet-dev_cosine_accuracy': 0.5089611178614823, 'eval_triplet-dev_dot_accuracy': 0.24939246658566222, 'eval_triplet-dev_manhattan_accuracy': 0.511543134872418, 'eval_triplet-dev_euclidean_accuracy': 0.5103280680437424, 'eval_triplet-dev_max_accuracy': 0.511543134872418, 'eval_label-accuracy-dev_pearson_cosine': nan, 'eval_label-accuracy-dev_spearman_cosine': nan, 'eval_label-accuracy-dev_pearson_manhattan': 0.049476403113581605, 'eval_label-accuracy-dev_spearman_manhattan':

  0%|          | 0/94 [00:00<?, ?it/s]

early stopping required metric_for_best_model, but did not find eval_aggregate_score so early stopping is disabled


{'eval_stsb_loss': 4.755264759063721, 'eval_stsb_runtime': 42.8844, 'eval_stsb_samples_per_second': 34.978, 'eval_stsb_steps_per_second': 2.192, 'epoch': 0.12}


  0%|          | 0/63 [00:00<?, ?it/s]

early stopping required metric_for_best_model, but did not find eval_aggregate_score so early stopping is disabled


{'eval_quora_loss': 2.7670435905456543, 'eval_quora_runtime': 28.1058, 'eval_quora_samples_per_second': 35.58, 'eval_quora_steps_per_second': 2.242, 'epoch': 0.12}


  0%|          | 0/63 [00:00<?, ?it/s]

early stopping required metric_for_best_model, but did not find eval_aggregate_score so early stopping is disabled


{'eval_natural-questions_loss': 2.767043113708496, 'eval_natural-questions_runtime': 28.6045, 'eval_natural-questions_samples_per_second': 34.96, 'eval_natural-questions_steps_per_second': 2.202, 'epoch': 0.12}


Computing widget examples:   0%|          | 0/3 [00:00<?, ?example/s]

Traceback (most recent call last):
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/transformers/trainer.py", line 3022, in _save_checkpoint
    metric_value = metrics[metric_to_check]
                   ~~~~~~~^^^^^^^^^^^^^^^^^
KeyError: 'eval_aggregate_score'

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/tmp/ipykernel_8990/1301178606.py", line 179, in train_model
    trainer.train()
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/transformers/trainer.py", line 2052, in train
    return inner_training_loop(
           ^^^^^^^^^^^^^^^^^^^^
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/transformers/trainer.py", line 2467, in _inner_training_loop
    self._maybe_log_save_evaluate(tr_loss, grad_norm, model, trial, epoch, ignore_keys_for_eval)
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/transformers/trainer.py", li

VBox(children=(Label(value='0.036 MB of 0.036 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
aggregate_score,▁▁▁
eval/all-nli-triplet_loss,▁
eval/all-nli-triplet_runtime,▁
eval/all-nli-triplet_samples_per_second,▁
eval/all-nli-triplet_steps_per_second,▁
eval/label-accuracy-dev_pearson_dot,▁
eval/label-accuracy-dev_pearson_euclidean,▁
eval/label-accuracy-dev_pearson_manhattan,▁
eval/label-accuracy-dev_spearman_dot,▁
eval/label-accuracy-dev_spearman_euclidean,▁

0,1
aggregate_score,
eval/all-nli-triplet_loss,3.46489
eval/all-nli-triplet_runtime,606.3367
eval/all-nli-triplet_samples_per_second,10.859
eval/all-nli-triplet_steps_per_second,0.679
eval/label-accuracy-dev_pearson_cosine,
eval/label-accuracy-dev_pearson_dot,-0.01166
eval/label-accuracy-dev_pearson_euclidean,0.03907
eval/label-accuracy-dev_pearson_manhattan,0.04948
eval/label-accuracy-dev_pearson_max,


Run 8yu15rc5 errored:
Traceback (most recent call last):
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/transformers/trainer.py", line 3022, in _save_checkpoint
    metric_value = metrics[metric_to_check]
                   ~~~~~~~^^^^^^^^^^^^^^^^^
KeyError: 'eval_aggregate_score'

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 306, in _run_job
    self._function()
  File "/tmp/ipykernel_8990/1301178606.py", line 179, in train_model
    trainer.train()
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/transformers/trainer.py", line 2052, in train
    return inner_training_loop(
           ^^^^^^^^^^^^^^^^^^^^
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/transformers/trainer.py", line 2467, in _inner_training_loop
    self._maybe_log_save_evaluate(

Currently using DataParallel (DP) for multi-gpu training, while DistributedDataParallel (DDP) is recommended for faster training. See https://sbert.net/docs/sentence_transformer/training/distributed.html for more information.


  0%|          | 0/41100 [00:00<?, ?it/s]

Column 'hypothesis' is at index 1, whereas a column with this name is usually expected at index 0. Note that the column order can be important for some losses, e.g. MultipleNegativesRankingLoss will always consider the first column as the anchor and the second as the positive, regardless of the dataset column names. Consider renaming the columns to match the expected order, e.g.:
dataset = dataset.select_columns(['hypothesis', 'entailment', 'contradiction'])
Traceback (most recent call last):
  File "/tmp/ipykernel_8990/1301178606.py", line 179, in train_model
    trainer.train()
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/transformers/trainer.py", line 2052, in train
    return inner_training_loop(
           ^^^^^^^^^^^^^^^^^^^^
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/transformers/trainer.py", line 2388, in _inner_training_loop
    tr_loss_step = self.training_step(model, inputs)
                   ^^^^^^^^^^^^^^^^^^^^^^

Run tqw17xxr errored:
Traceback (most recent call last):
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 306, in _run_job
    self._function()
  File "/tmp/ipykernel_8990/1301178606.py", line 179, in train_model
    trainer.train()
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/transformers/trainer.py", line 2052, in train
    return inner_training_loop(
           ^^^^^^^^^^^^^^^^^^^^
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/transformers/trainer.py", line 2388, in _inner_training_loop
    tr_loss_step = self.training_step(model, inputs)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/transformers/trainer.py", line 3518, in training_step
    self.accelerator.backward(loss, **kwargs)
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/accelerate/accelerator.py", line 2246,

Currently using DataParallel (DP) for multi-gpu training, while DistributedDataParallel (DDP) is recommended for faster training. See https://sbert.net/docs/sentence_transformer/training/distributed.html for more information.


  0%|          | 0/41100 [00:00<?, ?it/s]

Traceback (most recent call last):
  File "/tmp/ipykernel_8990/1301178606.py", line 179, in train_model
    trainer.train()
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/transformers/trainer.py", line 2052, in train
    return inner_training_loop(
           ^^^^^^^^^^^^^^^^^^^^
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/transformers/trainer.py", line 2388, in _inner_training_loop
    tr_loss_step = self.training_step(model, inputs)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/transformers/trainer.py", line 3485, in training_step
    loss = self.compute_loss(model, inputs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/sentence_transformers/trainer.py", line 344, in compute_loss
    loss = loss_fn(features, labels)
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/leonkrug/ana

Run 7245cnsz errored:
Traceback (most recent call last):
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 306, in _run_job
    self._function()
  File "/tmp/ipykernel_8990/1301178606.py", line 179, in train_model
    trainer.train()
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/transformers/trainer.py", line 2052, in train
    return inner_training_loop(
           ^^^^^^^^^^^^^^^^^^^^
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/transformers/trainer.py", line 2388, in _inner_training_loop
    tr_loss_step = self.training_step(model, inputs)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/transformers/trainer.py", line 3485, in training_step
    loss = self.compute_loss(model, inputs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packa

Currently using DataParallel (DP) for multi-gpu training, while DistributedDataParallel (DDP) is recommended for faster training. See https://sbert.net/docs/sentence_transformer/training/distributed.html for more information.


  0%|          | 0/41100 [00:00<?, ?it/s]

Traceback (most recent call last):
  File "/tmp/ipykernel_8990/1301178606.py", line 179, in train_model
    trainer.train()
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/transformers/trainer.py", line 2052, in train
    return inner_training_loop(
           ^^^^^^^^^^^^^^^^^^^^
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/transformers/trainer.py", line 2388, in _inner_training_loop
    tr_loss_step = self.training_step(model, inputs)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/transformers/trainer.py", line 3485, in training_step
    loss = self.compute_loss(model, inputs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/sentence_transformers/trainer.py", line 344, in compute_loss
    loss = loss_fn(features, labels)
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/leonkrug/ana

Run saiil1bc errored:
Traceback (most recent call last):
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 306, in _run_job
    self._function()
  File "/tmp/ipykernel_8990/1301178606.py", line 179, in train_model
    trainer.train()
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/transformers/trainer.py", line 2052, in train
    return inner_training_loop(
           ^^^^^^^^^^^^^^^^^^^^
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/transformers/trainer.py", line 2388, in _inner_training_loop
    tr_loss_step = self.training_step(model, inputs)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/transformers/trainer.py", line 3485, in training_step
    loss = self.compute_loss(model, inputs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packa

Currently using DataParallel (DP) for multi-gpu training, while DistributedDataParallel (DDP) is recommended for faster training. See https://sbert.net/docs/sentence_transformer/training/distributed.html for more information.


  0%|          | 0/41100 [00:00<?, ?it/s]

Traceback (most recent call last):
  File "/tmp/ipykernel_8990/1301178606.py", line 179, in train_model
    trainer.train()
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/transformers/trainer.py", line 2052, in train
    return inner_training_loop(
           ^^^^^^^^^^^^^^^^^^^^
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/transformers/trainer.py", line 2388, in _inner_training_loop
    tr_loss_step = self.training_step(model, inputs)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/transformers/trainer.py", line 3485, in training_step
    loss = self.compute_loss(model, inputs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/sentence_transformers/trainer.py", line 344, in compute_loss
    loss = loss_fn(features, labels)
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/leonkrug/ana

Run eo60u1f0 errored:
Traceback (most recent call last):
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 306, in _run_job
    self._function()
  File "/tmp/ipykernel_8990/1301178606.py", line 179, in train_model
    trainer.train()
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/transformers/trainer.py", line 2052, in train
    return inner_training_loop(
           ^^^^^^^^^^^^^^^^^^^^
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/transformers/trainer.py", line 2388, in _inner_training_loop
    tr_loss_step = self.training_step(model, inputs)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/transformers/trainer.py", line 3485, in training_step
    loss = self.compute_loss(model, inputs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packa

Currently using DataParallel (DP) for multi-gpu training, while DistributedDataParallel (DDP) is recommended for faster training. See https://sbert.net/docs/sentence_transformer/training/distributed.html for more information.


  0%|          | 0/41100 [00:00<?, ?it/s]

Traceback (most recent call last):
  File "/tmp/ipykernel_8990/1301178606.py", line 179, in train_model
    trainer.train()
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/transformers/trainer.py", line 2052, in train
    return inner_training_loop(
           ^^^^^^^^^^^^^^^^^^^^
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/transformers/trainer.py", line 2388, in _inner_training_loop
    tr_loss_step = self.training_step(model, inputs)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/transformers/trainer.py", line 3485, in training_step
    loss = self.compute_loss(model, inputs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/sentence_transformers/trainer.py", line 344, in compute_loss
    loss = loss_fn(features, labels)
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/leonkrug/ana

Run 0d6ir904 errored:
Traceback (most recent call last):
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 306, in _run_job
    self._function()
  File "/tmp/ipykernel_8990/1301178606.py", line 179, in train_model
    trainer.train()
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/transformers/trainer.py", line 2052, in train
    return inner_training_loop(
           ^^^^^^^^^^^^^^^^^^^^
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/transformers/trainer.py", line 2388, in _inner_training_loop
    tr_loss_step = self.training_step(model, inputs)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packages/transformers/trainer.py", line 3485, in training_step
    loss = self.compute_loss(model, inputs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/leonkrug/anaconda3/envs/aicomp/lib/python3.11/site-packa

model.safetensors:   0%|          | 0.00/595M [00:00<?, ?B/s]

'https://huggingface.co/Leo1212/longformer-base-4096-sentence-transformers-best/commit/dce6d431c10e096140bece902bd3ee75d54e20a1'

: 