# Fine-Tune "sentence-transformers/paraphrase-MiniLM-L3-v2" Sentence Transformer

In [152]:
modelName= "sentence-transformers/paraphrase-MiniLM-L3-v2"

In [153]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import Dataset, DataLoader
import torch
from torch import nn, optim
from sentence_transformers import SentenceTransformer, models, losses, InputExample, models
from sentence_transformers.evaluation import LabelAccuracyEvaluator
from transformers import AutoTokenizer
from tqdm import tqdm
from sklearn.metrics.pairwise import cosine_similarity

In [154]:
df = pd.read_csv("datasets/resume.csv")  # Replace with actual path
df = df[["Resume_str", "Category"]]

label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['Category'])

texts = df['Resume_str'].tolist()
labels = df['label'].tolist()

In [155]:
class ResumeDataset(Dataset):
    def __init__(self, texts, labels, tokenizer):
        self.tokenized = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return {
            "input_ids": self.tokenized["input_ids"][idx],
            "attention_mask": self.tokenized["attention_mask"][idx],
            "label": self.labels[idx]
        }

tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-mpnet-base-v2")
dataset = ResumeDataset(texts, labels, tokenizer)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)

In [156]:
# Step 1: Build SentenceTransformer-style base model
word_embedding_model = models.Transformer(modelName)
pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension())
base_model = SentenceTransformer(modules=[word_embedding_model, pooling_model])

# Step 2: Define classification head
class SentenceClassifier(nn.Module):
    def __init__(self, base_model, num_classes):
        super(SentenceClassifier, self).__init__()
        self.base_model = base_model  # SentenceTransformer model
        self.classifier = nn.Sequential(
            nn.Linear(base_model.get_sentence_embedding_dimension(), 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )

    def forward(self, features):
        # Allow gradients for fine-tuning base model
        embeddings = self.base_model.forward(features)['sentence_embedding']
        return self.classifier(embeddings)

# Step 3: Instantiate classifier model
model = SentenceClassifier(base_model, num_classes=len(label_encoder.classes_))

In [157]:
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm

# Early Stopping Setup
best_loss = float('inf')
patience = 6
counter = 0

# Training config
baseEpochs = 200
device = torch.device('cuda' if torch.cuda.is_available else 'mps' if torch.mps.is_available else 'cpu')
print("Using device:", device)
model = model.to(device)

# Freeze base model initially
for param in model.base_model.parameters():
    param.requires_grad = False

criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=5e-4)

# Learning Rate Scheduler (optional for frozen phase)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=3, factor=0.5, verbose=True)

torch.save(model.state_dict(), "models/best_model.pt")
for epoch in range(baseEpochs):
    model.load_state_dict(torch.load("models/best_model.pt"))
    model.train()
    total_loss = 0
    progress_bar = tqdm(dataloader, desc=f"Epoch {epoch+1}", leave=False)

    for batch in progress_bar:
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = torch.tensor(batch["label"]).to(device)

        features = {"input_ids": input_ids, "attention_mask": attention_mask}
        outputs = model(features)

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        total_loss += loss.item()

    avg_loss = total_loss / len(dataloader)
    print(f"Epoch {epoch+1} complete. Avg Loss: {avg_loss:.4f}")

    # Step the scheduler
    scheduler.step(avg_loss)

    # Early Stopping Check
    if avg_loss < best_loss:
        best_loss = avg_loss
        counter = 0
        print("‚úÖ Improvement detected ‚Äî saving model")
        torch.save(model.state_dict(), "models/best_model.pt")
    else:
        counter += 1
        print(f"‚ö†Ô∏è No improvement. Patience left: {patience - counter}")
        if counter >= patience:
            print("‚èπÔ∏è Early stopping triggered.")
            break

Using device: cuda


  labels = torch.tensor(batch["label"]).to(device)


Epoch 1 complete. Avg Loss: 3.1331
‚úÖ Improvement detected ‚Äî saving model




Epoch 2 complete. Avg Loss: 3.0254
‚úÖ Improvement detected ‚Äî saving model




Epoch 3 complete. Avg Loss: 2.8772
‚úÖ Improvement detected ‚Äî saving model




Epoch 4 complete. Avg Loss: 2.6860
‚úÖ Improvement detected ‚Äî saving model




Epoch 5 complete. Avg Loss: 2.5052
‚úÖ Improvement detected ‚Äî saving model




Epoch 6 complete. Avg Loss: 2.3578
‚úÖ Improvement detected ‚Äî saving model




Epoch 7 complete. Avg Loss: 2.2188
‚úÖ Improvement detected ‚Äî saving model




Epoch 8 complete. Avg Loss: 2.1409
‚úÖ Improvement detected ‚Äî saving model




Epoch 9 complete. Avg Loss: 2.0579
‚úÖ Improvement detected ‚Äî saving model




Epoch 10 complete. Avg Loss: 1.9896
‚úÖ Improvement detected ‚Äî saving model




Epoch 11 complete. Avg Loss: 1.9407
‚úÖ Improvement detected ‚Äî saving model




Epoch 12 complete. Avg Loss: 1.8808
‚úÖ Improvement detected ‚Äî saving model




Epoch 13 complete. Avg Loss: 1.8279
‚úÖ Improvement detected ‚Äî saving model




Epoch 14 complete. Avg Loss: 1.7950
‚úÖ Improvement detected ‚Äî saving model




Epoch 15 complete. Avg Loss: 1.7529
‚úÖ Improvement detected ‚Äî saving model




Epoch 16 complete. Avg Loss: 1.7184
‚úÖ Improvement detected ‚Äî saving model




Epoch 17 complete. Avg Loss: 1.6709
‚úÖ Improvement detected ‚Äî saving model




Epoch 18 complete. Avg Loss: 1.6546
‚úÖ Improvement detected ‚Äî saving model




Epoch 19 complete. Avg Loss: 1.6297
‚úÖ Improvement detected ‚Äî saving model




Epoch 20 complete. Avg Loss: 1.5981
‚úÖ Improvement detected ‚Äî saving model




Epoch 21 complete. Avg Loss: 1.5661
‚úÖ Improvement detected ‚Äî saving model




Epoch 22 complete. Avg Loss: 1.5523
‚úÖ Improvement detected ‚Äî saving model




Epoch 23 complete. Avg Loss: 1.5400
‚úÖ Improvement detected ‚Äî saving model




Epoch 24 complete. Avg Loss: 1.4940
‚úÖ Improvement detected ‚Äî saving model




Epoch 25 complete. Avg Loss: 1.4840
‚úÖ Improvement detected ‚Äî saving model




Epoch 26 complete. Avg Loss: 1.4770
‚úÖ Improvement detected ‚Äî saving model




Epoch 27 complete. Avg Loss: 1.4532
‚úÖ Improvement detected ‚Äî saving model




Epoch 28 complete. Avg Loss: 1.4271
‚úÖ Improvement detected ‚Äî saving model




Epoch 29 complete. Avg Loss: 1.4119
‚úÖ Improvement detected ‚Äî saving model




Epoch 30 complete. Avg Loss: 1.4054
‚úÖ Improvement detected ‚Äî saving model




Epoch 31 complete. Avg Loss: 1.3969
‚úÖ Improvement detected ‚Äî saving model




Epoch 32 complete. Avg Loss: 1.3549
‚úÖ Improvement detected ‚Äî saving model




Epoch 33 complete. Avg Loss: 1.3389
‚úÖ Improvement detected ‚Äî saving model




Epoch 34 complete. Avg Loss: 1.3322
‚úÖ Improvement detected ‚Äî saving model




Epoch 35 complete. Avg Loss: 1.3178
‚úÖ Improvement detected ‚Äî saving model




Epoch 36 complete. Avg Loss: 1.3169
‚úÖ Improvement detected ‚Äî saving model




Epoch 37 complete. Avg Loss: 1.3072
‚úÖ Improvement detected ‚Äî saving model




Epoch 38 complete. Avg Loss: 1.2825
‚úÖ Improvement detected ‚Äî saving model




Epoch 39 complete. Avg Loss: 1.2527
‚úÖ Improvement detected ‚Äî saving model




Epoch 40 complete. Avg Loss: 1.2541
‚ö†Ô∏è No improvement. Patience left: 5




Epoch 41 complete. Avg Loss: 1.2560
‚ö†Ô∏è No improvement. Patience left: 4




Epoch 42 complete. Avg Loss: 1.2557
‚ö†Ô∏è No improvement. Patience left: 3




Epoch 43 complete. Avg Loss: 1.2504
‚úÖ Improvement detected ‚Äî saving model




Epoch 44 complete. Avg Loss: 1.2454
‚úÖ Improvement detected ‚Äî saving model




Epoch 45 complete. Avg Loss: 1.2170
‚úÖ Improvement detected ‚Äî saving model




Epoch 46 complete. Avg Loss: 1.2457
‚ö†Ô∏è No improvement. Patience left: 5




Epoch 47 complete. Avg Loss: 1.2229
‚ö†Ô∏è No improvement. Patience left: 4




Epoch 48 complete. Avg Loss: 1.2343
‚ö†Ô∏è No improvement. Patience left: 3




Epoch 49 complete. Avg Loss: 1.2478
‚ö†Ô∏è No improvement. Patience left: 2




Epoch 50 complete. Avg Loss: 1.2177
‚ö†Ô∏è No improvement. Patience left: 1




Epoch 51 complete. Avg Loss: 1.1972
‚úÖ Improvement detected ‚Äî saving model




Epoch 52 complete. Avg Loss: 1.1877
‚úÖ Improvement detected ‚Äî saving model




Epoch 53 complete. Avg Loss: 1.1874
‚úÖ Improvement detected ‚Äî saving model




Epoch 54 complete. Avg Loss: 1.1764
‚úÖ Improvement detected ‚Äî saving model




Epoch 55 complete. Avg Loss: 1.1860
‚ö†Ô∏è No improvement. Patience left: 5




Epoch 56 complete. Avg Loss: 1.1720
‚úÖ Improvement detected ‚Äî saving model




Epoch 57 complete. Avg Loss: 1.1758
‚ö†Ô∏è No improvement. Patience left: 5




Epoch 58 complete. Avg Loss: 1.1710
‚úÖ Improvement detected ‚Äî saving model




Epoch 59 complete. Avg Loss: 1.1544
‚úÖ Improvement detected ‚Äî saving model




Epoch 60 complete. Avg Loss: 1.1637
‚ö†Ô∏è No improvement. Patience left: 5




Epoch 61 complete. Avg Loss: 1.1567
‚ö†Ô∏è No improvement. Patience left: 4




Epoch 62 complete. Avg Loss: 1.1705
‚ö†Ô∏è No improvement. Patience left: 3




Epoch 63 complete. Avg Loss: 1.1535
‚úÖ Improvement detected ‚Äî saving model




Epoch 64 complete. Avg Loss: 1.1481
‚úÖ Improvement detected ‚Äî saving model




Epoch 65 complete. Avg Loss: 1.1405
‚úÖ Improvement detected ‚Äî saving model




Epoch 66 complete. Avg Loss: 1.1336
‚úÖ Improvement detected ‚Äî saving model




Epoch 67 complete. Avg Loss: 1.1333
‚úÖ Improvement detected ‚Äî saving model




Epoch 68 complete. Avg Loss: 1.1217
‚úÖ Improvement detected ‚Äî saving model




Epoch 69 complete. Avg Loss: 1.1330
‚ö†Ô∏è No improvement. Patience left: 5




Epoch 70 complete. Avg Loss: 1.1214
‚úÖ Improvement detected ‚Äî saving model




Epoch 71 complete. Avg Loss: 1.1170
‚úÖ Improvement detected ‚Äî saving model




Epoch 72 complete. Avg Loss: 1.1095
‚úÖ Improvement detected ‚Äî saving model




Epoch 73 complete. Avg Loss: 1.1194
‚ö†Ô∏è No improvement. Patience left: 5




Epoch 74 complete. Avg Loss: 1.1100
‚ö†Ô∏è No improvement. Patience left: 4




Epoch 75 complete. Avg Loss: 1.1135
‚ö†Ô∏è No improvement. Patience left: 3




Epoch 76 complete. Avg Loss: 1.1123
‚ö†Ô∏è No improvement. Patience left: 2




Epoch 77 complete. Avg Loss: 1.1133
‚ö†Ô∏è No improvement. Patience left: 1




Epoch 78 complete. Avg Loss: 1.0986
‚úÖ Improvement detected ‚Äî saving model




Epoch 79 complete. Avg Loss: 1.1145
‚ö†Ô∏è No improvement. Patience left: 5




Epoch 80 complete. Avg Loss: 1.0803
‚úÖ Improvement detected ‚Äî saving model




Epoch 81 complete. Avg Loss: 1.0879
‚ö†Ô∏è No improvement. Patience left: 5




Epoch 82 complete. Avg Loss: 1.0824
‚ö†Ô∏è No improvement. Patience left: 4




Epoch 83 complete. Avg Loss: 1.0949
‚ö†Ô∏è No improvement. Patience left: 3




Epoch 84 complete. Avg Loss: 1.1000
‚ö†Ô∏è No improvement. Patience left: 2




Epoch 85 complete. Avg Loss: 1.0752
‚úÖ Improvement detected ‚Äî saving model




Epoch 86 complete. Avg Loss: 1.0779
‚ö†Ô∏è No improvement. Patience left: 5




Epoch 87 complete. Avg Loss: 1.0872
‚ö†Ô∏è No improvement. Patience left: 4




Epoch 88 complete. Avg Loss: 1.0792
‚ö†Ô∏è No improvement. Patience left: 3




Epoch 89 complete. Avg Loss: 1.0928
‚ö†Ô∏è No improvement. Patience left: 2




Epoch 90 complete. Avg Loss: 1.0853
‚ö†Ô∏è No improvement. Patience left: 1


                                                           

Epoch 91 complete. Avg Loss: 1.0804
‚ö†Ô∏è No improvement. Patience left: 0
‚èπÔ∏è Early stopping triggered.




In [158]:
tuneEpochs = 20

# üîì Unfreeze base model
for param in model.base_model.parameters():
    param.requires_grad = True

# üîÅ New optimizer & scheduler for fine-tuning
optimizer = optim.AdamW(model.parameters(), lr=2e-5)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=3, factor=0.5, verbose=True)

# Early stopping state
tune_best_loss = float('inf')
tune_counter = 0
tune_patience = 6

for epoch in range(baseEpochs, baseEpochs + tuneEpochs):
    model.load_state_dict(torch.load("models/best_model.pt"))
    model.train()
    total_loss = 0
    progress_bar = tqdm(dataloader, desc=f"Tune Epoch {epoch+1}", leave=False)

    for batch in progress_bar:
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = torch.tensor(batch["label"]).to(device)

        features = {"input_ids": input_ids, "attention_mask": attention_mask}
        outputs = model(features)

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        total_loss += loss.item()

    avg_loss = total_loss / len(dataloader)
    print(f"üõ†Ô∏è Epoch {epoch+1} complete. Avg Loss: {avg_loss:.4f}")

    scheduler.step(avg_loss)

    # Early stopping
    if avg_loss < tune_best_loss:
        tune_best_loss = avg_loss
        tune_counter = 0
        print("‚úÖ Improvement detected ‚Äî saving model")
        torch.save(model.state_dict(), "models/best_model.pt")
    else:
        tune_counter += 1
        print(f"‚ö†Ô∏è No improvement. Patience left: {tune_patience - tune_counter}")
        if tune_counter >= tune_patience:
            print("‚èπÔ∏è Early stopping triggered.")
            break

  labels = torch.tensor(batch["label"]).to(device)


üõ†Ô∏è Epoch 201 complete. Avg Loss: 1.0300
‚úÖ Improvement detected ‚Äî saving model




üõ†Ô∏è Epoch 202 complete. Avg Loss: 0.7516
‚úÖ Improvement detected ‚Äî saving model




üõ†Ô∏è Epoch 203 complete. Avg Loss: 0.6303
‚úÖ Improvement detected ‚Äî saving model




üõ†Ô∏è Epoch 204 complete. Avg Loss: 0.5337
‚úÖ Improvement detected ‚Äî saving model




üõ†Ô∏è Epoch 205 complete. Avg Loss: 0.4718
‚úÖ Improvement detected ‚Äî saving model




üõ†Ô∏è Epoch 206 complete. Avg Loss: 0.3731
‚úÖ Improvement detected ‚Äî saving model




üõ†Ô∏è Epoch 207 complete. Avg Loss: 0.3211
‚úÖ Improvement detected ‚Äî saving model




üõ†Ô∏è Epoch 208 complete. Avg Loss: 0.2693
‚úÖ Improvement detected ‚Äî saving model




üõ†Ô∏è Epoch 209 complete. Avg Loss: 0.2142
‚úÖ Improvement detected ‚Äî saving model




üõ†Ô∏è Epoch 210 complete. Avg Loss: 0.1716
‚úÖ Improvement detected ‚Äî saving model




üõ†Ô∏è Epoch 211 complete. Avg Loss: 0.1364
‚úÖ Improvement detected ‚Äî saving model




üõ†Ô∏è Epoch 212 complete. Avg Loss: 0.1457
‚ö†Ô∏è No improvement. Patience left: 5




üõ†Ô∏è Epoch 213 complete. Avg Loss: 0.1516
‚ö†Ô∏è No improvement. Patience left: 4




üõ†Ô∏è Epoch 214 complete. Avg Loss: 0.1280
‚úÖ Improvement detected ‚Äî saving model




üõ†Ô∏è Epoch 215 complete. Avg Loss: 0.0994
‚úÖ Improvement detected ‚Äî saving model




üõ†Ô∏è Epoch 216 complete. Avg Loss: 0.1046
‚ö†Ô∏è No improvement. Patience left: 5




üõ†Ô∏è Epoch 217 complete. Avg Loss: 0.1034
‚ö†Ô∏è No improvement. Patience left: 4




üõ†Ô∏è Epoch 218 complete. Avg Loss: 0.1032
‚ö†Ô∏è No improvement. Patience left: 3




üõ†Ô∏è Epoch 219 complete. Avg Loss: 0.1047
‚ö†Ô∏è No improvement. Patience left: 2




üõ†Ô∏è Epoch 220 complete. Avg Loss: 0.0799
‚úÖ Improvement detected ‚Äî saving model


In [162]:
model.load_state_dict(torch.load("models/best_model.pt"))
torch.save(model.base_model.state_dict(), "models/model2.pt")

# TEST

In [163]:
encoder = SentenceTransformer(modelName)
resume = "Master's in Computer Science"
jd = "Bachelor's in Healthcare or related fields"
resumeEmbeddings = encoder.encode([resume])
jdEmbeddings = encoder.encode([jd])
similarity = cosine_similarity(resumeEmbeddings, jdEmbeddings)[0][0]
print("Similarity : ", similarity)

Similarity :  0.2797977


In [164]:
encoder = SentenceTransformer(modelName)
encoder.load_state_dict(torch.load("models/model2.pt"))
resume = "Master's in Computer Science"
jd = "Bachelor's in Healthcare or related fields"
resumeEmbeddings = encoder.encode([resume])
jdEmbeddings = encoder.encode([jd])
similarity = cosine_similarity(resumeEmbeddings, jdEmbeddings)[0][0]
print("Similarity : ", similarity)

Similarity :  0.3099702
