In [1]:
import datasets
from sentence_transformers import losses
from sentence_transformers import SentenceTransformer, SentenceTransformerTrainer, SentenceTransformerTrainingArguments
from sentence_transformers.evaluation import EmbeddingSimilarityEvaluator, SimilarityFunction
from sentence_transformers.training_args import BatchSamplers
from datasets import load_dataset


In [2]:

# 1. Load the AllNLI dataset: https://huggingface.co/datasets/sentence-transformers/all-nli, only 10k train and 1k dev
train_dataset = datasets.load_from_disk("D:\\SBERT-Training\\datasets\\All-NLI\\train")

eval_dataset = datasets.load_from_disk("D:\\SBERT-Training\\datasets\\All-NLI\\eval")



In [4]:

# 2. Create an evaluator to perform useful HPO
stsb_eval_dataset = datasets.load_from_disk("D:\\SBERT-Training\\datasets\\stsb\\eval")

dev_evaluator = EmbeddingSimilarityEvaluator(
    sentences1=stsb_eval_dataset["sentence1"],
    sentences2=stsb_eval_dataset["sentence2"],
    scores=stsb_eval_dataset["score"],
    main_similarity=SimilarityFunction.COSINE,
    name="sts-dev",
)


In [27]:

# 3. Define the Hyperparameter Search Space
def hpo_search_space(trial):
    return {
        "num_train_epochs": trial.suggest_int("num_train_epochs", 0.01, 0.02),
        "per_device_train_batch_size": trial.suggest_int("per_device_train_batch_size", 8, 16),
        "warmup_ratio": trial.suggest_float("warmup_ratio", 0, 0.1),
        "learning_rate": trial.suggest_float("learning_rate", 1e-6, 1e-4, log=True),
    }


In [28]:

# 4. Define the Model Initialization
def hpo_model_init(trial):
    return SentenceTransformer("D:\\distilbert-base-uncased")


In [29]:

# 5. Define the Loss Initialization
def hpo_loss_init(model):
    return losses.MultipleNegativesRankingLoss(model)


In [30]:

# 6. Define the Objective Function
def hpo_compute_objective(metrics):
    """
    Valid keys are: 'eval_loss', 'eval_sts-dev_pearson_cosine', 'eval_sts-dev_spearman_cosine',
    'eval_sts-dev_pearson_manhattan', 'eval_sts-dev_spearman_manhattan', 'eval_sts-dev_pearson_euclidean',
    'eval_sts-dev_spearman_euclidean', 'eval_sts-dev_pearson_dot', 'eval_sts-dev_spearman_dot',
    'eval_sts-dev_pearson_max', 'eval_sts-dev_spearman_max', 'eval_runtime', 'eval_samples_per_second',
    'eval_steps_per_second', 'epoch'

    due to the evaluator that we're using.
    """
    return metrics["eval_sts-dev_spearman_cosine"]


In [31]:

# 7. Define the training arguments
args = SentenceTransformerTrainingArguments(
    # Required parameter:
    output_dir="checkpoints",
    # Optional training parameters:
    # max_steps=10000, # We might want to limit the number of steps for HPO
    fp16=True,  # Set to False if you get an error that your GPU can't run on FP16
    bf16=False,  # Set to True if you have a GPU that supports BF16
    batch_sampler=BatchSamplers.NO_DUPLICATES,  # MultipleNegativesRankingLoss benefits from no duplicate samples in a batch
    # Optional tracking/debugging parameters:
    eval_strategy="no", # We don't need to evaluate/save during HPO
    save_strategy="no",
    logging_steps=10,
    run_name="hpo",  # Will be used in W&B if `wandb` is installed
)


In [32]:
# 8. Define a callback to save the model with the trial ID
def save_model_callback(trial, trial_id):
    output_dir = os.path.join(args.output_dir, f"trial-{trial_id}")
    os.makedirs(output_dir, exist_ok=True)
    trial.set_user_attr("output_dir", output_dir)


In [33]:

# 9. Create the trainer with model_init rather than model
trainer = SentenceTransformerTrainer(
    model=None,
    args=args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    evaluator=dev_evaluator,
    model_init=hpo_model_init,
    loss=hpo_loss_init,
)


No sentence-transformers model found with name D:\distilbert-base-uncased. Creating a new one with mean pooling.
No sentence-transformers model found with name D:\distilbert-base-uncased. Creating a new one with mean pooling.


In [34]:
import optuna
import os

In [35]:
# 10. Perform the HPO with a custom callback
def objective(trial):
    trial_id = trial.number
    save_model_callback(trial, trial_id)
    
    # Set trial-specific output directory
    trial_output_dir = trial.user_attrs["output_dir"]
    trainer.args.output_dir = trial_output_dir
    
    # Train the model
    trainer.train()
    
    # Evaluate the model
    metrics = trainer.evaluate()
    
    # Save the model
    trainer.save_model(trial_output_dir)
    
    # Compute the objective
    objective = hpo_compute_objective(metrics)
    return objective

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=20)

best_trial = study.best_trial
print(f"Best trial: {best_trial.number}")
print(f"  Value: {best_trial.value}")
print("  Params: ")
for key, value in best_trial.params.items():
    print(f"    {key}: {value}")

print(f"Model saved in: {best_trial.user_attrs['output_dir']}")

[I 2024-07-10 22:41:53,122] A new study created in memory with name: no-name-c4b5d631-56e3-44c5-a575-aa843cb4a7ee
No sentence-transformers model found with name D:\distilbert-base-uncased. Creating a new one with mean pooling.


  0%|          | 0/3750 [00:00<?, ?it/s]

{'loss': 1.1799, 'grad_norm': 27.037506103515625, 'learning_rate': 4.986666666666667e-05, 'epoch': 0.01}
{'loss': 0.9988, 'grad_norm': 9.98847484588623, 'learning_rate': 4.973333333333334e-05, 'epoch': 0.02}
{'loss': 0.7459, 'grad_norm': 22.904983520507812, 'learning_rate': 4.96e-05, 'epoch': 0.02}
{'loss': 0.9139, 'grad_norm': 8.846088409423828, 'learning_rate': 4.9466666666666665e-05, 'epoch': 0.03}
{'loss': 0.6652, 'grad_norm': 8.97737979888916, 'learning_rate': 4.933333333333334e-05, 'epoch': 0.04}


[W 2024-07-10 22:43:13,893] Trial 0 failed with parameters: {} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "c:\Users\aakha\AppData\Local\Programs\Python\Python310\lib\site-packages\optuna\study\_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
  File "C:\Users\aakha\AppData\Local\Temp\ipykernel_6816\389994455.py", line 11, in objective
    trainer.train()
  File "c:\Users\aakha\AppData\Local\Programs\Python\Python310\lib\site-packages\transformers\trainer.py", line 1932, in train
    return inner_training_loop(
  File "c:\Users\aakha\AppData\Local\Programs\Python\Python310\lib\site-packages\transformers\trainer.py", line 2330, in _inner_training_loop
    self.optimizer.step()
  File "c:\Users\aakha\AppData\Local\Programs\Python\Python310\lib\site-packages\accelerate\optimizer.py", line 170, in step
    self.optimizer.step(closure)
  File "c:\Users\aakha\AppData\Local\Programs\Python\Python310\lib\site-packages\to

KeyboardInterrupt: 

In [None]:

# 9. Perform the HPO
best_trial = trainer.hyperparameter_search(
    hp_space=hpo_search_space,
    compute_objective=hpo_compute_objective,
    n_trials=20,
    direction="maximize",
    backend="optuna",
)
print(best_trial)