<a href="https://colab.research.google.com/github/Samin-Sadaf7/Book-of-LLMs/blob/main/HandsOnLLM_10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%%capture
!pip install -q accelerate>=0.27.2 peft>=0.9.0 bitsandbytes>=0.43.0 transformers>=4.38.2 trl>=0.7.11 sentencepiece>=0.1.99
!pip install -q sentence-transformers>=3.0.0 mteb>=1.1.2 datasets>=2.18.0

In [None]:
from datasets import load_dataset

# Load MNLI dataset from GLUE
# 0 = entailment, 1 = neutral, 2 = contradiction
train_dataset = load_dataset("glue", "mnli", split="train").select(range(50_000))
train_dataset = train_dataset.remove_columns("idx")

Downloading readme:   0%|          | 0.00/35.3k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/52.2M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.21M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.25M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.22M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.26M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/392702 [00:00<?, ? examples/s]

Generating validation_matched split:   0%|          | 0/9815 [00:00<?, ? examples/s]

Generating validation_mismatched split:   0%|          | 0/9832 [00:00<?, ? examples/s]

Generating test_matched split:   0%|          | 0/9796 [00:00<?, ? examples/s]

Generating test_mismatched split:   0%|          | 0/9847 [00:00<?, ? examples/s]

In [None]:
train_dataset[2]

{'premise': 'One of our number will carry out your instructions minutely.',
 'hypothesis': 'A member of my team will execute your orders with immense precision.',
 'label': 0}

In [None]:
from sentence_transformers import SentenceTransformer

# Use a base model
embedding_model = SentenceTransformer('bert-base-uncased')



config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

In [None]:
from sentence_transformers import losses
#Define the loss function. In softmax loss, we will also need to explicitly set the number of labels
train_loss = losses.SoftmaxLoss(
    model = embedding_model,
    sentence_embedding_dimension=embedding_model.get_sentence_embedding_dimension(),
    num_labels=3
)

In [None]:
from sentence_transformers.evaluation import EmbeddingSimilarityEvaluator

# Create an embedding similarity evaluator for stsb
val_sts = load_dataset('glue', 'stsb', split='validation')
evaluator = EmbeddingSimilarityEvaluator(
    sentences1=val_sts["sentence1"],
    sentences2=val_sts["sentence2"],
    scores=[score/5 for score in val_sts["label"]],
    main_similarity="cosine",
)

In [None]:
from sentence_transformers.training_args import SentenceTransformerTrainingArguments

#Define training arguments
args = SentenceTransformerTrainingArguments(
    output_dir="base_embedding_model",
    num_train_epochs=1,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    warmup_steps=100,
    fp16=True,
    eval_steps=100,
    logging_steps=100,
)

In [None]:
from sentence_transformers.trainer import SentenceTransformerTrainer

# Train embedding model
trainer = SentenceTransformerTrainer(
    model=embedding_model,
    args=args,
    train_dataset=train_dataset,
    loss=train_loss,
    evaluator=evaluator
)
trainer.train()

dataset = dataset.select_columns(['hypothesis', 'entailment', 'contradiction'])


Step,Training Loss
100,1.0731
200,0.9523
300,0.8828
400,0.8382
500,0.8221
600,0.8243
700,0.804
800,0.7881
900,0.7681
1000,0.7656


Computing widget examples:   0%|          | 0/1 [00:00<?, ?example/s]

TrainOutput(global_step=1563, training_loss=0.8090017200355262, metrics={'train_runtime': 388.8947, 'train_samples_per_second': 128.57, 'train_steps_per_second': 4.019, 'total_flos': 0.0, 'train_loss': 0.8090017200355262, 'epoch': 1.0})

In [None]:
evaluator(embedding_model)

{'pearson_cosine': 0.5974316810823376, 'spearman_cosine': 0.6583789347813761}

In [None]:
import gc
import torch

gc.collect()
torch.cuda.empty_cache()

In [None]:
from mteb import MTEB

# Choose evaluation task
evaluation = MTEB(tasks=["Banking77Classification"])

# Calculate results
results = evaluation.run(embedding_model)
results



[TaskResult(task_name=Banking77Classification, scores=...)]

In [None]:
from datasets import Dataset, load_dataset

#Load MNLI dataset from
#0=entailment, 1=neutral, 2=contradiction
train_dataset = load_dataset(
    "glue",
    "mnli",
    split="train"
).select(range(50_000))
train_dataset = train_dataset.remove_columns("idx")

#(neutral/contradiction)=0 and entailment=1
mappiing={2:0, 1:0, 0:1}
train_dataset = Dataset.from_dict({
    "sentence1": train_dataset["premise"],
    "sentence2": train_dataset["hypothesis"],
    "label": [float(mappiing[label]) for label in train_dataset["label"]]
})

In [None]:
from sentence_transformers.evaluation import EmbeddingSimilarityEvaluator

# Create an embedding similarity evaluator for stsb
val_sts = load_dataset('glue', 'stsb', split='validation')
evaluator = EmbeddingSimilarityEvaluator(
    sentences1=val_sts["sentence1"],
    sentences2=val_sts["sentence2"],
    scores=[score/5 for score in val_sts["label"]],
    main_similarity="cosine",
)

In [None]:
from sentence_transformers import losses, SentenceTransformer
from sentence_transformers.training_args import SentenceTransformerTrainingArguments
from sentence_transformers.trainer import SentenceTransformerTrainer

#Define model
embedding_model = SentenceTransformer("bert-base-uncased")

#Loss Function
train_loss = losses.CosineSimilarityLoss(model=embedding_model)

#Define Training Arguments
args = SentenceTransformerTrainingArguments(
    output_dir="consineloss_embedding_model",
    num_train_epochs=1,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    warmup_steps=100,
    fp16=True,
    eval_steps=100,
    logging_steps=100,
)

#Train Model
trainer = SentenceTransformerTrainer(
    model=embedding_model,
    args=args,
    train_dataset=train_dataset,
    loss=train_loss,
    evaluator=evaluator
)
trainer.train()



Step,Training Loss
100,0.2325
200,0.1706
300,0.1724
400,0.1596
500,0.1524
600,0.1584
700,0.1514
800,0.1555
900,0.1482
1000,0.1469


Computing widget examples:   0%|          | 0/1 [00:00<?, ?example/s]

TrainOutput(global_step=1563, training_loss=0.1573866608046753, metrics={'train_runtime': 349.5285, 'train_samples_per_second': 143.05, 'train_steps_per_second': 4.472, 'total_flos': 0.0, 'train_loss': 0.1573866608046753, 'epoch': 1.0})

In [None]:
#Evaluate our training model
evaluator(embedding_model)

{'pearson_cosine': 0.730035786222801, 'spearman_cosine': 0.7321006653728315}

In [None]:
import random
from tqdm import tqdm
from datasets import Dataset, load_dataset

## Load MNLI dataset from GLUE
mnli = load_dataset(
    "glue",
    "mnli",
    split="train"
).select(range(50000))
mnli = mnli.remove_columns("idx")
mnli = mnli.filter(
    lambda x: True if x["label"] == 0 else False
)

#Prepare data and a soft negative
train_dataset = {
    "anchor" : [],
    "positive" : [],
    "negative" : []
}
soft_negatives = mnli["hypothesis"]
random.shuffle(soft_negatives)

for row, soft_negative in tqdm(zip(mnli, soft_negatives)):
  train_dataset["anchor"].append(row["premise"])
  train_dataset["positive"].append(row["hypothesis"])
  train_dataset["negative"].append(soft_negative)

train_dataset = Dataset.from_dict(train_dataset)

Filter:   0%|          | 0/50000 [00:00<?, ? examples/s]

16875it [00:02, 5636.80it/s]


In [None]:
from sentence_transformers.evaluation import EmbeddingSimilarityEvaluator

#Create an embedding similarity evaluator for stsb
val_sts = load_dataset('glue', 'stsb', split='validation')
evaluator = EmbeddingSimilarityEvaluator(
    sentences1=val_sts["sentence1"],
    sentences2=val_sts["sentence2"],
    scores=[score/5 for score in val_sts["label"]],
    main_similarity="cosine",
)

Downloading data:   0%|          | 0.00/502k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/151k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/114k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/5749 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/1500 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1379 [00:00<?, ? examples/s]

In [None]:
from sentence_transformers import losses, SentenceTransformer
from sentence_transformers.training_args import SentenceTransformerTrainingArguments
from sentence_transformers.trainer import SentenceTransformerTrainer

#Define the model
embedding_model = SentenceTransformer("bert-base-uncased")

#Define the loss function
train_loss = losses.MultipleNegativesRankingLoss(model=embedding_model)

#Define training arguments
args = SentenceTransformerTrainingArguments(
    output_dir="mnloss_embedding_model",
    num_train_epochs=1,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    warmup_steps=100,
    fp16=True,
    eval_steps=100,
    logging_steps=100,
)

#Train the model
trainer = SentenceTransformerTrainer(
    model=embedding_model,
    args=args,
    train_dataset=train_dataset,
    loss=train_loss,
    evaluator=evaluator
)
trainer.train()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Step,Training Loss
100,0.3326
200,0.1036
300,0.0762
400,0.069
500,0.074


Computing widget examples:   0%|          | 0/1 [00:00<?, ?example/s]

TrainOutput(global_step=528, training_loss=0.12741587672269705, metrics={'train_runtime': 191.7787, 'train_samples_per_second': 87.992, 'train_steps_per_second': 2.753, 'total_flos': 0.0, 'train_loss': 0.12741587672269705, 'epoch': 1.0})

In [None]:
#Evaluate our training model
evaluator(embedding_model)

{'pearson_cosine': 0.809092201459816, 'spearman_cosine': 0.8118450833946295}

In [None]:
import gc
import torch

gc.collect()
torch.cuda.empty_cache()

In [None]:
from datasets import load_dataset
from sentence_transformers.evaluation import EmbeddingSimilarityEvaluator
#Load MNLI dataset from GLUE
#0 = entailment, 1= neutral, 2=contradiction
train_dataset = load_dataset(
    "glue",
    "mnli",
    split="train"
).select(range(50_000))
train_dataset = train_dataset.remove_columns("idx")

#Create an embedding similarity evaluator for stsb
val_sts =  load_dataset('glue', 'stsb', split='validation')
evaluator = EmbeddingSimilarityEvaluator(
    sentences1=val_sts["sentence1"],
    sentences2=val_sts["sentence2"],
    scores=[score/5 for score in val_sts["label"]],
    main_similarity="cosine"
)

In [None]:
from sentence_transformers import losses, SentenceTransformer
from sentence_transformers.training_args import SentenceTransformerTrainingArguments
from sentence_transformers.trainer import SentenceTransformerTrainer

#Define the model
embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

#Define the loss function
train_loss = losses.MultipleNegativesRankingLoss(model=embedding_model)

#Define training arguments
args = SentenceTransformerTrainingArguments(
    output_dir="finetuned_embedding_model",
    num_train_epochs=1,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    warmup_steps=100,
    fp16=True,
    eval_steps=100,
    logging_steps=100,
)

#Train model
trainer = SentenceTransformerTrainer(
    model=embedding_model,
    args=args,
    train_dataset=train_dataset,
    loss=train_loss,
    evaluator=evaluator
)
trainer.train()

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

dataset = dataset.select_columns(['hypothesis', 'entailment', 'contradiction'])


Step,Training Loss
100,0.1573
200,0.1105
300,0.1199
400,0.1188
500,0.1083
600,0.1011
700,0.1196
800,0.0986
900,0.1041
1000,0.1052


Computing widget examples:   0%|          | 0/1 [00:00<?, ?example/s]

TrainOutput(global_step=1563, training_loss=0.10938328195670745, metrics={'train_runtime': 118.8147, 'train_samples_per_second': 420.823, 'train_steps_per_second': 13.155, 'total_flos': 0.0, 'train_loss': 0.10938328195670745, 'epoch': 1.0})

In [None]:
#Evaluate our training model
evaluator(embedding_model)

{'pearson_cosine': 0.8495102776103727, 'spearman_cosine': 0.8489056783655592}

In [None]:
import gc
import torch

gc.collect()
torch.cuda.empty_cache()

In [None]:
"""Augmented SBERT"""
"""1. Fine tune a cross encoder using small, annotated dataset (gold dataset)--> ground truth
   2. Create new sentence pairs using the cross encoder(BERT)
   3 Label the new sentece pairs with the fine tuned cross encoder (Silver dataset)
   4. Train a bi-encoder(SBERT) on the extended dataset (gold+silver dataset) """
import pandas as pd
from tqdm import tqdm
from datasets import load_dataset, Dataset
from sentence_transformers import InputExample
from sentence_transformers.datasets import NoDuplicatesDataLoader

In [None]:
#Prepare a small set of 10000 documents (gold dataset) for cross encoder
dataset = load_dataset("glue", "mnli", split="train").select(range(10_000))
mapping = {2:0, 1:0, 0:1}

#Data loader
gold_examples = [
    InputExample(
        texts=[row["premise"], row["hypothesis"]],
        label=mapping[row["label"]]
    )
    for row in tqdm(dataset)
]
gold_dataloader = NoDuplicatesDataLoader(gold_examples, batch_size=16)

100%|██████████| 10000/10000 [00:00<00:00, 22130.21it/s]


In [None]:
#Panadas dataframe for easier data handling
gold = pd.DataFrame(
    {
        "sentence1": dataset["premise"],
        "sentence2": dataset["hypothesis"],
        "label": [mapping[label] for label in dataset["label"]]
    }
)

In [None]:
from sentence_transformers.cross_encoder import CrossEncoder

#Train a cross encoder on the gold dataset
cross_encoder = CrossEncoder("bert-base-uncased", num_labels=2)
cross_encoder.fit(
    train_dataloader=gold_dataloader,
    epochs=1,
    show_progress_bar=True,
    warmup_steps=100,
    use_amp=False
)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Iteration:   0%|          | 0/625 [00:00<?, ?it/s]

In [None]:
#Prepare the silver dataset by predicting labels with the cross encoder
silver = load_dataset(
    "glue", "mnli", split="train"
).select(range(10_000, 50_000))

pairs = list(zip(silver["premise"], silver["hypothesis"]))

In [None]:
import numpy as np
#Label the sentence pairs using our fine-tuned cross-encoder
output = cross_encoder.predict(
    pairs,
    apply_softmax=True,
    show_progress_bar=True
)
silver = pd.DataFrame(
    {
        "sentence1": silver["premise"],
        "sentence2": silver["hypothesis"],
        "label": np.argmax(output, axis=1)
    }
)

Batches:   0%|          | 0/1250 [00:00<?, ?it/s]

In [None]:
#Combining gold and silver
data = pd.concat([gold, silver], ignore_index=True, axis=0)
data = data.drop_duplicates(subset=["sentence1", "sentence2"], keep="first")
train_dataset = Dataset.from_pandas(data, preserve_index=False)

In [None]:
from sentence_transformers.evaluation import EmbeddingSimilarityEvaluator

#Create an embedding similarity evaluator for stsb
val_sts = load_dataset('glue', 'stsb', split='validation')
evaluator = EmbeddingSimilarityEvaluator(
    sentences1=val_sts["sentence1"],
    sentences2=val_sts["sentence2"],
    scores=[score/5 for score in val_sts["label"]],
    main_similarity="cosine",
)

In [None]:
from sentence_transformers import losses, SentenceTransformer
from sentence_transformers.training_args import SentenceTransformerTrainingArguments
from sentence_transformers.trainer import SentenceTransformerTrainer

#Define model
embedding_model = SentenceTransformer("bert-base-uncased")

#Loss Function
train_loss = losses.CosineSimilarityLoss(model=embedding_model)

#Define Training Arguments
args = SentenceTransformerTrainingArguments(
    output_dir="augmented_embedding_model",
    num_train_epochs=1,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    warmup_steps=100,
    fp16=True,
    eval_steps=100,
    logging_steps=100,
)

#Train model
trainer = SentenceTransformerTrainer(
    model=embedding_model,
    args=args,
    train_dataset=train_dataset,
    loss=train_loss,
    evaluator=evaluator
)
trainer.train()



Step,Training Loss
100,0.222
200,0.1594
300,0.1462
400,0.1445
500,0.1426
600,0.137
700,0.1345
800,0.1354
900,0.1337
1000,0.1334


Computing widget examples:   0%|          | 0/1 [00:00<?, ?example/s]

TrainOutput(global_step=1563, training_loss=0.1425663416612934, metrics={'train_runtime': 356.8528, 'train_samples_per_second': 140.108, 'train_steps_per_second': 4.38, 'total_flos': 0.0, 'train_loss': 0.1425663416612934, 'epoch': 1.0})

In [None]:
evaluator(embedding_model)

{'pearson_cosine': 0.7130269800960213, 'spearman_cosine': 0.7206153869840849}

In [None]:
# Download additional tokenizer
import nltk
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [None]:
from tqdm import tqdm
from datasets import Dataset, load_dataset
from sentence_transformers.datasets import DenoisingAutoEncoderDataset

# Create a flat list of sentences
mnli = load_dataset("glue", "mnli", split="train").select(range(25_000))
flat_sentences = mnli["premise"] + mnli["hypothesis"]

# Add noise to our input data
damaged_data = DenoisingAutoEncoderDataset(list(set(flat_sentences)))

# Create dataset
train_dataset = {"damaged_sentence": [], "original_sentence": []}
for data in tqdm(damaged_data):
    train_dataset["damaged_sentence"].append(data.texts[0])
    train_dataset["original_sentence"].append(data.texts[1])
train_dataset = Dataset.from_dict(train_dataset)

100%|██████████| 48353/48353 [00:12<00:00, 3875.94it/s]


In [None]:
train_dataset[0]

{'damaged_sentence': 'consider the truth by the anti-tobacconists.',
 'original_sentence': 'But consider the evil done to the truth by the good anti-tobacconists.'}

In [None]:
from sentence_transformers.evaluation import EmbeddingSimilarityEvaluator

#Create an embedding similarity evaluator for stsb
val_sts = load_dataset("glue", "stsb", split="validation")
evaluator = EmbeddingSimilarityEvaluator(
    sentences1=val_sts["sentence1"],
    sentences2=val_sts["sentence2"],
    scores=val_sts["label"],
    main_similarity="cosine",
)

In [None]:
from sentence_transformers import models, SentenceTransformer

# Create your embedding model
word_embedding_model = models.Transformer('bert-base-uncased')
pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension(), 'cls')
embedding_model = SentenceTransformer(modules=[word_embedding_model, pooling_model])

In [None]:
from sentence_transformers import losses

# Use the denoising auto-encoder loss
train_loss = losses.DenoisingAutoEncoderLoss(
    embedding_model, tie_encoder_decoder=True
)
train_loss.decoder = train_loss.decoder.to("cuda")

Some weights of BertLMHeadModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['bert.encoder.layer.0.crossattention.output.LayerNorm.bias', 'bert.encoder.layer.0.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.0.crossattention.output.dense.bias', 'bert.encoder.layer.0.crossattention.output.dense.weight', 'bert.encoder.layer.0.crossattention.self.key.bias', 'bert.encoder.layer.0.crossattention.self.key.weight', 'bert.encoder.layer.0.crossattention.self.query.bias', 'bert.encoder.layer.0.crossattention.self.query.weight', 'bert.encoder.layer.0.crossattention.self.value.bias', 'bert.encoder.layer.0.crossattention.self.value.weight', 'bert.encoder.layer.1.crossattention.output.LayerNorm.bias', 'bert.encoder.layer.1.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.1.crossattention.output.dense.bias', 'bert.encoder.layer.1.crossattention.output.dense.weight', 'bert.encoder.layer.1.crossattention.self.key.bias', 'bert.e

In [None]:
from sentence_transformers.training_args import SentenceTransformerTrainingArguments
from sentence_transformers.trainer import SentenceTransformerTrainer

#Define training arguments
args = SentenceTransformerTrainingArguments(
    output_dir="denoising_embedding_model",
    num_train_epochs=1,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    warmup_steps=100,
    fp16=True,
    eval_steps=100,
    logging_steps=100,
)

#Train model
trainer = SentenceTransformerTrainer(
    model=embedding_model,
    args=args,
    train_dataset=train_dataset,
    loss=train_loss,
    evaluator=evaluator
)
trainer.train()

We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.


Step,Training Loss
100,6.7219
200,4.8226
300,4.5838
400,4.4275
500,4.3571
600,4.2535
700,4.1709
800,4.1511
900,4.1107
1000,4.0177


Computing widget examples:   0%|          | 0/1 [00:00<?, ?example/s]

TrainOutput(global_step=3023, training_loss=4.012064546060767, metrics={'train_runtime': 952.9455, 'train_samples_per_second': 50.741, 'train_steps_per_second': 3.172, 'total_flos': 0.0, 'train_loss': 4.012064546060767, 'epoch': 1.0})

In [None]:
#Evaluate training model
evaluator(embedding_model)

{'pearson_cosine': 0.7416102463259606, 'spearman_cosine': 0.7486543814230622}

In [None]:
import gc
import torch

gc.collect()
torch.cuda.empty_cache()