# Fine-tuning Sentence Transformers

Learn to adapt pre-trained models to your domain.

In [None]:
from sentence_transformers import SentenceTransformer, InputExample, losses
from sentence_transformers.evaluation import EmbeddingSimilarityEvaluator
from torch.utils.data import DataLoader
import json
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

## Why Fine-tune?

Generic models work well, but fine-tuning improves performance on your specific domain.

In [None]:
# Load generic model
generic_model = SentenceTransformer('all-MiniLM-L6-v2')

# Test on domain-specific query
query = "API authentication method"
docs = [
    "Use Bearer token in Authorization header for API authentication",  # Relevant
    "Annual subscriptions get 20% discount",  # Not relevant
    "Python is a programming language"  # Not relevant
]

# Embed
query_emb = generic_model.encode(query)
doc_embs = generic_model.encode(docs)

# Calculate similarities
similarities = cosine_similarity([query_emb], doc_embs)[0]

print("Generic model similarities:")
for i, (doc, sim) in enumerate(zip(docs, similarities)):
    print(f"{i+1}. [{sim:.3f}] {doc[:50]}...")

print("\n⚠️  Generic model may not capture domain-specific relationships")

## Load Training Data

In [None]:
# Load extended training pairs (100 examples)
with open('../fixtures/input/training_pairs_extended.json', 'r') as f:
    training_data = json.load(f)

print(f"Loaded {len(training_data)} training examples\n")

# Show example
example = training_data[0]
print("Training example:")
print(f"  Query: {example['query']}")
print(f"  Positive: {example['positive']}")
print(f"  Negative: {example['negative']}")

## Prepare Training Examples

Convert to InputExample format for sentence-transformers.

In [None]:
# Create InputExample objects
train_examples = []

for item in training_data:
    # Positive pair (query, positive_doc)
    train_examples.append(
        InputExample(texts=[item['query'], item['positive']])
    )

print(f"Created {len(train_examples)} training examples")
print(f"\nExample format: {train_examples[0].texts}")

## Create DataLoader

In [None]:
# Create DataLoader for batching
train_dataloader = DataLoader(
    train_examples,
    shuffle=True,
    batch_size=16  # Larger batch for 100 examples
)

print(f"DataLoader created:")
print(f"  Batch size: 16")
print(f"  Number of batches: {len(train_dataloader)}")

## Choose Loss Function

MultipleNegativesRankingLoss is best for retrieval tasks.

In [None]:
# Load model to fine-tune
model = SentenceTransformer('all-MiniLM-L6-v2')

# Define loss function
train_loss = losses.MultipleNegativesRankingLoss(model)

print("✓ Loss function: MultipleNegativesRankingLoss")
print("  Uses other batch examples as negatives")
print("  Efficient for retrieval tasks")

## Fine-tune Model

In [None]:
# Calculate warmup steps (10% of total)
num_epochs = 3
total_steps = len(train_dataloader) * num_epochs
warmup_steps = int(0.1 * total_steps)

print(f"Training configuration:")
print(f"  Epochs: {num_epochs}")
print(f"  Total steps: {total_steps}")
print(f"  Warmup steps: {warmup_steps}")

# Fine-tune
print("\nStarting fine-tuning...\n")

model.fit(
    train_objectives=[(train_dataloader, train_loss)],
    epochs=num_epochs,
    warmup_steps=warmup_steps,
    output_path='../output/fine_tuned_model',
    show_progress_bar=True
)

print("\n✓ Fine-tuning complete!")

## Test Fine-tuned Model

In [None]:
# Load fine-tuned model
finetuned_model = SentenceTransformer('../output/fine_tuned_model')

# Test on same query as before
query = "API authentication method"
docs = [
    "Use Bearer token in Authorization header for API authentication",
    "Annual subscriptions get 20% discount",
    "Python is a programming language"
]

# Generic model
generic_emb = generic_model.encode(query)
generic_doc_embs = generic_model.encode(docs)
generic_sims = cosine_similarity([generic_emb], generic_doc_embs)[0]

# Fine-tuned model
finetuned_emb = finetuned_model.encode(query)
finetuned_doc_embs = finetuned_model.encode(docs)
finetuned_sims = cosine_similarity([finetuned_emb], finetuned_doc_embs)[0]

print("Comparison:")
print("\nGeneric Model:")
for i, (doc, sim) in enumerate(zip(docs, generic_sims)):
    print(f"  {i+1}. [{sim:.3f}] {doc[:50]}...")

print("\nFine-tuned Model:")
for i, (doc, sim) in enumerate(zip(docs, finetuned_sims)):
    print(f"  {i+1}. [{sim:.3f}] {doc[:50]}...")

# Calculate improvement
improvement = finetuned_sims[0] - generic_sims[0]
print(f"\n✓ Improvement on relevant doc: +{improvement:.3f}")

## Visualize Improvements

In [None]:
import matplotlib.pyplot as plt

# Test on multiple queries
test_queries = [
    ("password reset", "To reset password click Forgot Password", "Pricing starts at $29"),
    ("pricing info", "Professional plan costs $99/month", "Reset password from settings"),
    ("vacation days", "New employees get 15 vacation days", "API uses Bearer tokens")
]

generic_scores = []
finetuned_scores = []

for query, relevant, irrelevant in test_queries:
    # Generic
    q_emb = generic_model.encode(query)
    rel_emb = generic_model.encode(relevant)
    generic_score = cosine_similarity([q_emb], [rel_emb])[0][0]
    generic_scores.append(generic_score)

    # Fine-tuned
    q_emb = finetuned_model.encode(query)
    rel_emb = finetuned_model.encode(relevant)
    finetuned_score = cosine_similarity([q_emb], [rel_emb])[0][0]
    finetuned_scores.append(finetuned_score)

# Plot
x = np.arange(len(test_queries))
width = 0.35

fig, ax = plt.subplots(figsize=(10, 6))
ax.bar(x - width/2, generic_scores, width, label='Generic')
ax.bar(x + width/2, finetuned_scores, width, label='Fine-tuned')

ax.set_ylabel('Similarity Score')
ax.set_title('Generic vs Fine-tuned Model Performance')
ax.set_xticks(x)
ax.set_xticklabels([f"Query {i+1}" for i in range(len(test_queries))])
ax.legend()
plt.show()

avg_improvement = np.mean(np.array(finetuned_scores) - np.array(generic_scores))
print(f"\nAverage improvement: +{avg_improvement:.3f}")

## Save Model

In [None]:
# Model already saved during training
# Can also save manually:
# model.save('../output/my_fine_tuned_model')

# Load later:
# loaded_model = SentenceTransformer('../output/fine_tuned_model')

print("✓ Model saved at: ../output/fine_tuned_model")
print("  Use: SentenceTransformer('../output/fine_tuned_model')")

## Summary

✅ Prepared training data in InputExample format  
✅ Used MultipleNegativesRankingLoss for retrieval  
✅ Fine-tuned model with warmup  
✅ Tested improvements  
✅ Saved model

**Key learnings:**
- Fine-tuning improves domain-specific performance
- 100 examples show significant improvement
- MultipleNegativesRankingLoss best for retrieval
- Warmup stabilizes training
- More data = better results

**Next steps:**
- Try TripletLoss for explicit triplets (notebook 02)
- Experiment with ContrastiveLoss (notebook 03)
- Learn full Transformers Trainer API (notebook 04)
- Compare all approaches (notebook 05)