# Fine-Tune an Embedding Model

Fine-tuning embedding model using HF model + sentence transformers

In [None]:
!pip3 install sentence-transformers==3.0.1 torch==2.2.2

In [None]:
import torch
import sentence_transformers

print("torch version:", torch.__version__)
print("sentence transformers version:", sentence_transformers.__version__)
PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0

In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = ""


In [None]:
from sentence_transformers import SentenceTransformer, InputExample, losses, util
from torch.utils.data import DataLoader

# Load embedding model
model = SentenceTransformer("Alibaba-NLP/gte-base-en-v1.5", trust_remote_code=True)

# Function to print similarity score
def get_similarity_score():
    sentence1 = "I love the taste of fresh apples."
    sentence2 = "Apples are rich in vitamins and fiber."
    embedding1 = model.encode(sentence1)
    embedding2 = model.encode(sentence2)
    cosine_score = util.cos_sim(embedding1, embedding2)
    score_number = cosine_score.item()
    print(f"Cosine similarity between '{sentence1}' and '{sentence2}': {score_number:.4f}")
    return cosine_score

# Print similarity score before training
print("Before training:")
similarity_before = get_similarity_score()

train_examples = [
    InputExample(texts=["I love eating apples.", "Apples are my favorite fruit", "Apple is a tech company"]),
    InputExample(texts=["Chocolate is a sweet treat loved by many.", "I can't resist a good piece of chocolate.", "Chocolate Rain was one of the most popular songs on YouTube from 2007."]),
    InputExample(texts=["Ice cream is a refreshing dessert.", "I love trying different ice cream flavors.", "The rapper and actor Ice Cube was wearing a cream colored suit to the VMAs."]),
    InputExample(texts=["Salad is a healthy meal option.", "I love a fresh, crisp salad with various vegetables.", "Salad Fingers is a surreal web series created by David Firth."]),
]
train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=8)
train_loss = losses.TripletLoss(model=model)

# fine tune
model.fit(train_objectives=[(train_dataloader, train_loss)], epochs=10) 

print("After training:")
similarity_after = get_similarity_score()

similarity_difference = similarity_after - similarity_before
print(f"Change in similarity score: {similarity_difference.item():4f}")
