# Task 1: Fine-tune Sentence Transformer - SOLUTION

In [None]:
from sentence_transformers import SentenceTransformer, InputExample, losses
from torch.utils.data import DataLoader
import json
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
with open('../fixtures/input/training_pairs_hard.json', 'r') as f:
    training_data = json.load(f)

print(f"Loaded {len(training_data)} training pairs")

## Task 1: Prepare Training Examples

In [None]:
# SOLUTION
train_examples = []

for item in training_data:
    train_examples.append(
        InputExample(texts=[item['query'], item['positive']])
    )

print(f"✓ Created {len(train_examples)} training examples")

## Task 2: Create DataLoader

In [None]:
# SOLUTION
train_dataloader = DataLoader(
    train_examples,
    shuffle=True,
    batch_size=4
)

print("✓ DataLoader created")

## Task 3: Fine-tune Model

In [None]:
# SOLUTION

# Load model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Create loss
train_loss = losses.MultipleNegativesRankingLoss(model)

# Calculate warmup
num_epochs = 3
warmup_steps = int(0.1 * len(train_dataloader) * num_epochs)

# Fine-tune
model.fit(
    train_objectives=[(train_dataloader, train_loss)],
    epochs=num_epochs,
    warmup_steps=warmup_steps,
    output_path='../output/fine_tuned_model',
    show_progress_bar=True
)

print("✓ Fine-tuning complete")

## Task 4: Compare Performance

In [None]:
# SOLUTION

# Load models
generic_model = SentenceTransformer('all-MiniLM-L6-v2')
finetuned_model = SentenceTransformer('../output/fine_tuned_model')

# Test pair
query = training_data[0]['query']
positive = training_data[0]['positive']

# Generic
generic_query_emb = generic_model.encode(query)
generic_pos_emb = generic_model.encode(positive)
generic_sim = cosine_similarity([generic_query_emb], [generic_pos_emb])[0][0]

# Fine-tuned
finetuned_query_emb = finetuned_model.encode(query)
finetuned_pos_emb = finetuned_model.encode(positive)
finetuned_sim = cosine_similarity([finetuned_query_emb], [finetuned_pos_emb])[0][0]

improvement = finetuned_sim - generic_sim

print(f"Generic similarity: {generic_sim:.3f}")
print(f"Fine-tuned similarity: {finetuned_sim:.3f}")
print(f"Improvement: +{improvement:.3f}")
print("✓ Task 4 passed")